legup-4.0
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
X86ISelLowering.cpp File Reference

Go to the source code of this file.

Data Structures

struct  IntrinsicData
 

Macros

#define DEBUG_TYPE   "x86-isel"
 

Enumerations

enum  StructReturnType { NotStructReturn, RegStructReturn, StackStructReturn }
 
enum  IntrinsicType {
  GATHER, SCATTER, PREFETCH, RDSEED,
  RDRAND, RDPMC, RDTSC, XTEST
}
 

Functions

 STATISTIC (NumTailCalls,"Number of tail calls")
 
static SDValue getMOVL (SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2)
 
static SDValue ExtractSubVector (SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
 
static SDValue Extract128BitVector (SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
 
static SDValue Extract256BitVector (SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
 Generate a DAG to grab 256-bits from a 512-bit vector. More...
 
static SDValue InsertSubVector (SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
 
static SDValue Insert128BitVector (SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
 
static SDValue Insert256BitVector (SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
 
static SDValue Concat128BitVectors (SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl)
 
static SDValue Concat256BitVectors (SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl)
 
static TargetLoweringObjectFilecreateTLOF (const Triple &TT)
 
static void getMaxByValAlign (Type *Ty, unsigned &MaxAlign)
 
static StructReturnType callIsStructReturn (const SmallVectorImpl< ISD::OutputArg > &Outs)
 
static StructReturnType argsAreStructReturn (const SmallVectorImpl< ISD::InputArg > &Ins)
 
static SDValue CreateCopyOfByValArgument (SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl)
 
static bool IsTailCallConvention (CallingConv::ID CC)
 
static bool IsCCallConvention (CallingConv::ID CC)
 Return true if the calling convention is a C calling convention. More...
 
static bool FuncIsMadeTailCallSafe (CallingConv::ID CC, bool GuaranteedTailCallOpt)
 
static SDValue EmitTailCallStoreRetAddr (SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, SDLoc dl)
 
static bool MatchingStackOffset (SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII)
 
static bool MayFoldLoad (SDValue Op)
 
static bool MayFoldIntoStore (SDValue Op)
 
static bool isTargetShuffle (unsigned Opcode)
 
static SDValue getTargetShuffleNode (unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
 
static SDValue getTargetShuffleNode (unsigned Opc, SDLoc dl, EVT VT, SDValue V1, unsigned TargetMask, SelectionDAG &DAG)
 
static SDValue getTargetShuffleNode (unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG)
 
static SDValue getTargetShuffleNode (unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
 
static bool isX86CCUnsigned (unsigned X86CC)
 Return true if the condition is an unsigned comparison operation. More...
 
static unsigned TranslateX86CC (ISD::CondCode SetCCOpcode, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG)
 
static bool hasFPCMov (unsigned X86CC)
 
static bool isUndefOrInRange (int Val, int Low, int Hi)
 
static bool isUndefOrEqual (int Val, int CmpVal)
 
static bool isSequentialOrUndefInRange (ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low)
 
static bool isPSHUFDMask (ArrayRef< int > Mask, MVT VT)
 
static bool isPSHUFHWMask (ArrayRef< int > Mask, MVT VT, bool HasInt256)
 
static bool isPSHUFLWMask (ArrayRef< int > Mask, MVT VT, bool HasInt256)
 
static bool isPALIGNRMask (ArrayRef< int > Mask, MVT VT, const X86Subtarget *Subtarget)
 
static void CommuteVectorShuffleMask (SmallVectorImpl< int > &Mask, unsigned NumElems)
 
static bool isSHUFPMask (ArrayRef< int > Mask, MVT VT, bool Commuted=false)
 
static bool isMOVHLPSMask (ArrayRef< int > Mask, MVT VT)
 
static bool isMOVHLPS_v_undef_Mask (ArrayRef< int > Mask, MVT VT)
 
static bool isMOVLPMask (ArrayRef< int > Mask, MVT VT)
 
static bool isMOVLHPSMask (ArrayRef< int > Mask, MVT VT)
 
static bool isINSERTPSMask (ArrayRef< int > Mask, MVT VT)
 
static SDValue Compact8x32ShuffleNode (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
 
static bool isUNPCKLMask (ArrayRef< int > Mask, MVT VT, bool HasInt256, bool V2IsSplat=false)
 
static bool isUNPCKHMask (ArrayRef< int > Mask, MVT VT, bool HasInt256, bool V2IsSplat=false)
 
static bool isUNPCKL_v_undef_Mask (ArrayRef< int > Mask, MVT VT, bool HasInt256)
 
static bool isUNPCKH_v_undef_Mask (ArrayRef< int > Mask, MVT VT, bool HasInt256)
 
static bool isINSERT64x4Mask (ArrayRef< int > Mask, MVT VT, unsigned int *Imm)
 
static bool isMOVLMask (ArrayRef< int > Mask, EVT VT)
 
static bool isVPERM2X128Mask (ArrayRef< int > Mask, MVT VT, bool HasFp256)
 
static unsigned getShuffleVPERM2X128Immediate (ShuffleVectorSDNode *SVOp)
 
static bool isPermImmMask (ArrayRef< int > Mask, MVT VT, unsigned &Imm8)
 
static bool isVPERMILPMask (ArrayRef< int > Mask, MVT VT)
 
static bool isCommutedMOVLMask (ArrayRef< int > Mask, MVT VT, bool V2IsSplat=false, bool V2IsUndef=false)
 
static bool isMOVSHDUPMask (ArrayRef< int > Mask, MVT VT, const X86Subtarget *Subtarget)
 
static bool isMOVSLDUPMask (ArrayRef< int > Mask, MVT VT, const X86Subtarget *Subtarget)
 
static bool isMOVDDUPYMask (ArrayRef< int > Mask, MVT VT, bool HasFp256)
 
static bool isMOVDDUPMask (ArrayRef< int > Mask, MVT VT)
 
static bool isVEXTRACTIndex (SDNode *N, unsigned vecWidth)
 
static bool isVINSERTIndex (SDNode *N, unsigned vecWidth)
 
static unsigned getShuffleSHUFImmediate (ShuffleVectorSDNode *N)
 
static unsigned getShufflePSHUFHWImmediate (ShuffleVectorSDNode *N)
 
static unsigned getShufflePSHUFLWImmediate (ShuffleVectorSDNode *N)
 
static unsigned getShufflePALIGNRImmediate (ShuffleVectorSDNode *SVOp)
 
static unsigned getExtractVEXTRACTImmediate (SDNode *N, unsigned vecWidth)
 
static unsigned getInsertVINSERTImmediate (SDNode *N, unsigned vecWidth)
 
static bool isZero (SDValue V)
 isZero - Returns true if Elt is a constant integer zero More...
 
static bool ShouldXformToMOVHLPS (ArrayRef< int > Mask, MVT VT)
 
static bool isScalarLoadToVector (SDNode *N, LoadSDNode **LD=nullptr)
 
static bool WillBeConstantPoolLoad (SDNode *N)
 
static bool ShouldXformToMOVLP (SDNode *V1, SDNode *V2, ArrayRef< int > Mask, MVT VT)
 
static bool isZeroShuffle (ShuffleVectorSDNode *N)
 
static SDValue getZeroVector (EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
 
static SDValue getOnesVector (MVT VT, bool HasInt256, SelectionDAG &DAG, SDLoc dl)
 
static void NormalizeMask (SmallVectorImpl< int > &Mask, unsigned NumElems)
 
static SDValue getUnpackl (SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
 getUnpackl - Returns a vector_shuffle node for an unpackl operation. More...
 
static SDValue getUnpackh (SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
 getUnpackh - Returns a vector_shuffle node for an unpackh operation. More...
 
static SDValue PromoteSplati8i16 (SDValue V, SelectionDAG &DAG, int &EltNo)
 
static SDValue getLegalSplat (SelectionDAG &DAG, SDValue V, int EltNo)
 getLegalSplat - Generate a legal splat with supported x86 shuffles More...
 
static SDValue PromoteSplat (ShuffleVectorSDNode *SV, SelectionDAG &DAG)
 PromoteSplat - Splat is promoted to target supported vector shuffles. More...
 
static SDValue getShuffleVectorZeroOrUndef (SDValue V2, unsigned Idx, bool IsZero, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool getTargetShuffleMask (SDNode *N, MVT VT, SmallVectorImpl< int > &Mask, bool &IsUnary)
 
static SDValue getShuffleScalarElt (SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth)
 
static unsigned getNumOfConsecutiveZeros (ShuffleVectorSDNode *SVOp, unsigned NumElems, bool ZerosFromLeft, SelectionDAG &DAG, unsigned PreferredNum=-1U)
 
static bool isShuffleMaskConsecutive (ShuffleVectorSDNode *SVOp, unsigned MaskI, unsigned MaskE, unsigned OpIdx, unsigned NumElems, unsigned &OpNum)
 
static bool isVectorShiftRight (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt)
 
static bool isVectorShiftLeft (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt)
 
static bool isVectorShift (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt)
 
static SDValue LowerBuildVectorv16i8 (SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI)
 
static SDValue LowerBuildVectorv8i16 (SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI)
 
static SDValue LowerBuildVectorv4x32 (SDValue Op, unsigned NumElems, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI)
 LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32. More...
 
static SDValue getVShift (bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, SDLoc dl)
 
static SDValue LowerAsSplatVectorLoad (SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG)
 
static SDValue EltsFromConsecutiveLoads (EVT VT, SmallVectorImpl< SDValue > &Elts, SDLoc &DL, SelectionDAG &DAG, bool isAfterLegalize)
 
static SDValue LowerVectorBroadcast (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static int getUnderlyingExtractedFromVec (SDValue &ExtractedFromVec, SDValue ExtIdx)
 For an EXTRACT_VECTOR_ELT with a constant index return the real underlying vector and index. More...
 
static SDValue buildFromShuffleMostly (SDValue Op, SelectionDAG &DAG)
 
static bool isHorizontalBinOp (const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
 Return true if N implements a horizontal binop and return the operands for the horizontal binop into V0 and V1. More...
 
static SDValue ExpandHorizontalBinOp (const SDValue &V0, const SDValue &V1, SDLoc DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI)
 Emit a sequence of two 128-bit horizontal add/sub followed by a concat_vector. More...
 
static SDValue matchAddSub (const BuildVectorSDNode *BV, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 Try to fold a build_vector that performs an 'addsub' into the sequence of 'vadd + vsub + blendi'. More...
 
static SDValue PerformBUILD_VECTORCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue LowerAVXCONCAT_VECTORS (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerCONCAT_VECTORS (SDValue Op, SelectionDAG &DAG)
 
static bool isNoopShuffleMask (ArrayRef< int > Mask)
 Tiny helper function to identify a no-op mask. More...
 
static bool isSingleInputShuffleMask (ArrayRef< int > Mask)
 Helper function to classify a mask as a single-input mask. More...
 
static SDValue getV4X86ShuffleImm8ForMask (ArrayRef< int > Mask, SelectionDAG &DAG)
 Get a 4-lane 8-bit shuffle immediate for a mask. More...
 
static SDValue lowerV2F64VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Handle lowering of 2-lane 64-bit floating point shuffles. More...
 
static SDValue lowerV2I64VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Handle lowering of 2-lane 64-bit integer shuffles. More...
 
static SDValue lowerV4F32VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Lower 4-lane 32-bit floating point shuffles. More...
 
static SDValue lowerV4I32VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Lower 4-lane i32 vector shuffles. More...
 
static SDValue lowerV8I16SingleInputVectorShuffle (SDLoc DL, SDValue V, MutableArrayRef< int > Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 shuffle lowering, and the most complex part. More...
 
static bool shouldLowerAsInterleaving (ArrayRef< int > Mask)
 Detect whether the mask pattern should be lowered through interleaving. More...
 
static SDValue lowerV8I16BasicBlendVectorShuffle (SDLoc DL, SDValue V1, SDValue V2, MutableArrayRef< int > Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Blend two v8i16 vectors using a naive unpack strategy. More...
 
static SDValue lowerV8I16VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Generic lowering of 8-lane i16 shuffles. More...
 
static SDValue lowerV16I8VectorShuffle (SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Generic lowering of v16i8 shuffles. More...
 
static SDValue lower128BitVectorShuffle (SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Dispatching routine to lower various 128-bit x86 vector shuffles. More...
 
static bool areAdjacentMasksSequential (ArrayRef< int > Mask)
 Tiny helper function to test whether adjacent masks are sequential. More...
 
static SDValue lowerVectorShuffle (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 Top-level lowering for x86 vector shuffles. More...
 
static bool isBlendMask (ArrayRef< int > MaskVals, MVT VT, bool hasSSE41, bool hasInt256, unsigned *MaskOut=nullptr)
 
static SDValue LowerVECTOR_SHUFFLEtoBlend (ShuffleVectorSDNode *SVOp, unsigned MaskValue, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool ShuffleCrosses128bitLane (MVT VT, unsigned InputIdx, unsigned OutputIdx)
 
static SDValue getPSHUFB (ArrayRef< int > MaskVals, SDValue V1, SDLoc &dl, SelectionDAG &DAG)
 
static SDValue LowerVECTOR_SHUFFLEv8i16 (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerVECTOR_SHUFFLEv16i16 (SDValue Op, SelectionDAG &DAG)
 v16i16 shuffles More...
 
static SDValue LowerVECTOR_SHUFFLEv16i8 (ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerVECTOR_SHUFFLEv32i8 (ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue RewriteAsNarrowerShuffle (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
 
static SDValue getVZextMovL (MVT VT, MVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, SDLoc dl)
 
static SDValue LowerVECTOR_SHUFFLE_256 (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
 
static SDValue LowerVECTOR_SHUFFLE_128v4 (ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
 
static bool MayFoldVectorLoad (SDValue V)
 
static SDValue getMOVDDup (SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG)
 
static SDValue getMOVLowToHigh (SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2)
 
static SDValue getMOVHighToLow (SDValue &Op, SDLoc &dl, SelectionDAG &DAG)
 
static SDValue getMOVLP (SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2)
 
static SDValue NarrowVectorLoadToElement (LoadSDNode *Load, unsigned Index, SelectionDAG &DAG)
 
static SDValue getINSERTPS (ShuffleVectorSDNode *SVOp, SDLoc &dl, SelectionDAG &DAG)
 
static SDValue LowerVectorIntExtend (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue NormalizeVectorShuffle (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool BUILD_VECTORtoBlendMask (BuildVectorSDNode *BuildVector, unsigned &MaskValue)
 
static SDValue LowerVSELECTtoBlend (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4 (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerINSERT_VECTOR_ELT_SSE4 (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerSCALAR_TO_VECTOR (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerEXTRACT_SUBVECTOR (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerINSERT_SUBVECTOR (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue GetTLSADDR (SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic=false)
 
static SDValue LowerToTLSGeneralDynamicModel32 (GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
 
static SDValue LowerToTLSGeneralDynamicModel64 (GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT)
 
static SDValue LowerToTLSLocalDynamicModel (GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool is64Bit)
 
static SDValue LowerToTLSExecModel (GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit, bool isPIC)
 
static SDValue LowerShiftParts (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerAVXExtend (SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue LowerZERO_EXTEND_AVX512 (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerANY_EXTEND (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerZERO_EXTEND (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerFP_EXTEND (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerFABS (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerFNEG (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerFCOPYSIGN (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerFGETSIGN (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerVectorAllZeroTest (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool hasNonFlagsUse (SDValue Op)
 return true if Op has a use that doesn't just read flags. More...
 
static bool isAllOnes (SDValue V)
 
static int translateX86FSETCC (ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1)
 
  • Turns an ISD::CondCode into a value suitable for SSE floating point mask CMPs.
More...
 
static SDValue Lower256IntVSETCC (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerIntVSETCC_AVX512 (SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue ChangeVSETULTtoVSETULE (SDLoc dl, SDValue Op1, SelectionDAG &DAG)
 Try to turn a VSETULT into a VSETULE by modifying its second operand Op1. If non-trivial (for example because it's not constant) return an empty value. More...
 
static SDValue LowerVSETCC (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool isX86LogicalCmp (SDValue Op)
 
static bool isTruncWithZeroHighBitsInput (SDValue V, SelectionDAG &DAG)
 
static SDValue LowerSIGN_EXTEND_AVX512 (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerSIGN_EXTEND (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static bool isAndOrOfSetCCs (SDValue Op, unsigned &Opc)
 
static bool isXor1OfSetCC (SDValue Op)
 
static SDValue LowerVACOPY (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue getTargetVShiftByConstNode (unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
 
static SDValue getTargetVShiftNode (unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG)
 
static SDValue LowerINTRINSIC_WO_CHAIN (SDValue Op, SelectionDAG &DAG)
 
static SDValue getGatherNode (unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget *Subtarget)
 
static SDValue getScatterNode (unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain)
 
static SDValue getPrefetchNode (unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain)
 
static void getReadPerformanceCounter (SDNode *N, SDLoc DL, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl< SDValue > &Results)
 
static void getReadTimeStampCounter (SDNode *N, SDLoc DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl< SDValue > &Results)
 
static SDValue LowerREADCYCLECOUNTER (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static void InitIntinsicsMap ()
 
static SDValue LowerINTRINSIC_W_CHAIN (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerADJUST_TRAMPOLINE (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerCTLZ (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerCTLZ_ZERO_UNDEF (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerCTTZ (SDValue Op, SelectionDAG &DAG)
 
static SDValue Lower256IntArith (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerADD (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerSUB (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerMUL (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerMUL_LOHI (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerScalarImmediateShift (SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue LowerScalarVariableShift (SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue LowerShift (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerXALUO (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerATOMIC_FENCE (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerCMP_SWAP (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerBITCAST (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static SDValue LowerLOAD_SUB (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerATOMIC_STORE (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerADDC_ADDE_SUBC_SUBE (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerFSINCOS (SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
 
static void ReplaceATOMIC_LOAD (SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
 
static MachineBasicBlockEmitXBegin (MachineInstr *MI, MachineBasicBlock *MBB, const TargetInstrInfo *TII)
 Utility function to emit xbegin specifying the start of an RTM region. More...
 
static MachineBasicBlockEmitPCMPSTRM (MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII)
 
static MachineBasicBlockEmitPCMPSTRI (MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII)
 
static MachineBasicBlockEmitMonitor (MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII, const X86Subtarget *Subtarget)
 
static bool checkAndUpdateEFLAGSKill (MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
 
static bool isShuffleHigh128VectorInsertLow (ShuffleVectorSDNode *SVOp)
 
static bool isShuffleLow128VectorInsertHigh (ShuffleVectorSDNode *SVOp)
 
static SDValue PerformShuffleCombine256 (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. More...
 
static SmallVector< int, 4 > getPSHUFShuffleMask (SDValue N)
 Get the PSHUF-style mask from PSHUF node. More...
 
static bool combineRedundantDWordShuffle (SDValue N, MutableArrayRef< int > Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 Search for a combinable shuffle across a chain ending in pshufd. More...
 
static bool combineRedundantHalfShuffle (SDValue N, MutableArrayRef< int > Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 Search for a combinable shuffle across a chain ending in pshuflw or pshufhw. More...
 
static SDValue PerformTargetShuffleCombine (SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 Try to combine x86 target specific shuffles. More...
 
static SDValue PerformShuffleCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 PerformShuffleCombine - Performs several different shuffle combines. More...
 
static SDValue PerformTruncateCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue XFormVExtractWithShuffleIntoLoad (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformEXTRACT_VECTOR_ELTCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 
static std::pair< unsigned, bool > matchIntegerMINMAX (SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 Matches a VSELECT onto min/max or return 0 if the node doesn't match. More...
 
static SDValue TransformVSELECTtoBlendVECTOR_SHUFFLE (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue PerformSELECTCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue checkBoolTestSetCCCombine (SDValue Cmp, X86::CondCode &CC)
 
static SDValue PerformCMOVCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]. More...
 
static SDValue PerformINTRINSIC_WO_CHAINCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue PerformMulCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformSHLCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue performShiftToAllZeros (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 Returns a vector of 0s if the node in input is a vector logical shift by a constant amount which is known to be bigger than or equal to the vector element size in bits. More...
 
static SDValue PerformShiftCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 PerformShiftCombine - Combine shifts. More...
 
static SDValue CMPEQCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static bool CanFoldXORWithAllOnes (const SDNode *N)
 
static SDValue WidenMaskArithmetic (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformAndCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformOrCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue performIntegerAbsCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformXorCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformLOADCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 PerformLOADCombine - Do target-specific dag combines on LOAD nodes. More...
 
static SDValue PerformSTORECombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 PerformSTORECombine - Do target-specific dag combines on STORE nodes. More...
 
static bool isHorizontalBinOp (SDValue &LHS, SDValue &RHS, bool IsCommutative)
 
static SDValue PerformFADDCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 PerformFADDCombine - Do target-specific dag combines on floating point adds. More...
 
static SDValue PerformFSUBCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 PerformFSUBCombine - Do target-specific dag combines on floating point subs. More...
 
static SDValue PerformFORCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformFMinFMaxCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformFANDCombine (SDNode *N, SelectionDAG &DAG)
 PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. More...
 
static SDValue PerformFANDNCombine (SDNode *N, SelectionDAG &DAG)
 PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes. More...
 
static SDValue PerformBTCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformVZEXT_MOVLCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformSIGN_EXTEND_INREGCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue PerformSExtCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformFMACombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue PerformZExtCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformISDSETCCCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue PerformINSERTPSCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue MaterializeSETB (SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, MVT VT)
 
static SDValue PerformSETCCCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue PerformBrCondCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 
static SDValue performVectorCompareAndMaskUnaryOpCombine (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformSINT_TO_FPCombine (SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI)
 
static SDValue PerformADCCombine (SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue OptimizeConditionalInDecrement (SDNode *N, SelectionDAG &DAG)
 
static SDValue PerformAddCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 PerformADDCombine - Do target-specific dag combines on integer adds. More...
 
static SDValue PerformSubCombine (SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
 
static SDValue performVZEXTCombine (SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
 performVZEXTCombine - Performs build vector combines More...
 
static bool clobbersFlagRegisters (const SmallVector< StringRef, 4 > &AsmPieces)
 

Variables

static cl::opt< bool > ExperimentalVectorWideningLegalization ("x86-experimental-vector-widening-legalization", cl::init(false), cl::desc("Enable an experimental vector type legalization through widening ""rather than promotion."), cl::Hidden)
 
static cl::opt< bool > ExperimentalVectorShuffleLowering ("x86-experimental-vector-shuffle-lowering", cl::init(false), cl::desc("Enable an experimental vector shuffle lowering code path."), cl::Hidden)
 
std::map< unsigned, IntrinsicDataIntrMap
 

Macro Definition Documentation

#define DEBUG_TYPE   "x86-isel"

Definition at line 57 of file X86ISelLowering.cpp.

Enumeration Type Documentation

Enumerator
GATHER 
SCATTER 
PREFETCH 
RDSEED 
RDRAND 
RDPMC 
RDTSC 
XTEST 

Definition at line 14385 of file X86ISelLowering.cpp.

CallIsStructReturn - Determines whether a call uses struct return semantics.

Enumerator
NotStructReturn 
RegStructReturn 
StackStructReturn 

Definition at line 2122 of file X86ISelLowering.cpp.

Function Documentation

static bool areAdjacentMasksSequential ( ArrayRef< int >  Mask)
static

Tiny helper function to test whether adjacent masks are sequential.

Definition at line 7897 of file X86ISelLowering.cpp.

7897  {
7898  for (int i = 0, Size = Mask.size(); i < Size; i += 2)
7899  if (Mask[i] + 1 != Mask[i+1])
7900  return false;
7901 
7902  return true;
7903 }
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
static StructReturnType argsAreStructReturn ( const SmallVectorImpl< ISD::InputArg > &  Ins)
static

ArgsAreStructReturn - Determines whether a function uses struct return semantics.

Definition at line 2143 of file X86ISelLowering.cpp.

2143  {
2144  if (Ins.empty())
2145  return NotStructReturn;
2146 
2147  const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2148  if (!Flags.isSRet())
2149  return NotStructReturn;
2150  if (Flags.isInReg())
2151  return RegStructReturn;
2152  return StackStructReturn;
2153 }
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
static bool BUILD_VECTORtoBlendMask ( BuildVectorSDNode BuildVector,
unsigned &  MaskValue 
)
static

Definition at line 9608 of file X86ISelLowering.cpp.

9609  {
9610  MaskValue = 0;
9611  unsigned NumElems = BuildVector->getNumOperands();
9612  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
9613  unsigned NumLanes = (NumElems - 1) / 8 + 1;
9614  unsigned NumElemsInLane = NumElems / NumLanes;
9615 
9616  // Blend for v16i16 should be symetric for the both lanes.
9617  for (unsigned i = 0; i < NumElemsInLane; ++i) {
9618  SDValue EltCond = BuildVector->getOperand(i);
9619  SDValue SndLaneEltCond =
9620  (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
9621 
9622  int Lane1Cond = -1, Lane2Cond = -1;
9623  if (isa<ConstantSDNode>(EltCond))
9624  Lane1Cond = !isZero(EltCond);
9625  if (isa<ConstantSDNode>(SndLaneEltCond))
9626  Lane2Cond = !isZero(SndLaneEltCond);
9627 
9628  if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
9629  // Lane1Cond != 0, means we want the first argument.
9630  // Lane1Cond == 0, means we want the second argument.
9631  // The encoding of this argument is 0 for the first argument, 1
9632  // for the second. Therefore, invert the condition.
9633  MaskValue |= !Lane1Cond << i;
9634  else if (Lane1Cond < 0)
9635  MaskValue |= !Lane2Cond << i;
9636  else
9637  return false;
9638  }
9639  return true;
9640 }
static bool isZero(SDValue V)
isZero - Returns true if Elt is a constant integer zero
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
static SDValue buildFromShuffleMostly ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 5913 of file X86ISelLowering.cpp.

5913  {
5914  MVT VT = Op.getSimpleValueType();
5915 
5916  // Skip if insert_vec_elt is not supported.
5917  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5919  return SDValue();
5920 
5921  SDLoc DL(Op);
5922  unsigned NumElems = Op.getNumOperands();
5923 
5924  SDValue VecIn1;
5925  SDValue VecIn2;
5926  SmallVector<unsigned, 4> InsertIndices;
5927  SmallVector<int, 8> Mask(NumElems, -1);
5928 
5929  for (unsigned i = 0; i != NumElems; ++i) {
5930  unsigned Opc = Op.getOperand(i).getOpcode();
5931 
5932  if (Opc == ISD::UNDEF)
5933  continue;
5934 
5935  if (Opc != ISD::EXTRACT_VECTOR_ELT) {
5936  // Quit if more than 1 elements need inserting.
5937  if (InsertIndices.size() > 1)
5938  return SDValue();
5939 
5940  InsertIndices.push_back(i);
5941  continue;
5942  }
5943 
5944  SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
5945  SDValue ExtIdx = Op.getOperand(i).getOperand(1);
5946  // Quit if non-constant index.
5947  if (!isa<ConstantSDNode>(ExtIdx))
5948  return SDValue();
5949  int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
5950 
5951  // Quit if extracted from vector of different type.
5952  if (ExtractedFromVec.getValueType() != VT)
5953  return SDValue();
5954 
5955  if (!VecIn1.getNode())
5956  VecIn1 = ExtractedFromVec;
5957  else if (VecIn1 != ExtractedFromVec) {
5958  if (!VecIn2.getNode())
5959  VecIn2 = ExtractedFromVec;
5960  else if (VecIn2 != ExtractedFromVec)
5961  // Quit if more than 2 vectors to shuffle
5962  return SDValue();
5963  }
5964 
5965  if (ExtractedFromVec == VecIn1)
5966  Mask[i] = Idx;
5967  else if (ExtractedFromVec == VecIn2)
5968  Mask[i] = Idx + NumElems;
5969  }
5970 
5971  if (!VecIn1.getNode())
5972  return SDValue();
5973 
5974  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
5975  SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
5976  for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
5977  unsigned Idx = InsertIndices[i];
5978  NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
5979  DAG.getIntPtrConstant(Idx));
5980  }
5981 
5982  return NV;
5983 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void push_back(const T &Elt)
Definition: SmallVector.h:225
unsigned getNumOperands() const
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx)
For an EXTRACT_VECTOR_ELT with a constant index return the real underlying vector and index...
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
unsigned getOpcode() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
static StructReturnType callIsStructReturn ( const SmallVectorImpl< ISD::OutputArg > &  Outs)
static

Definition at line 2128 of file X86ISelLowering.cpp.

2128  {
2129  if (Outs.empty())
2130  return NotStructReturn;
2131 
2132  const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2133  if (!Flags.isSRet())
2134  return NotStructReturn;
2135  if (Flags.isInReg())
2136  return RegStructReturn;
2137  return StackStructReturn;
2138 }
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
static bool CanFoldXORWithAllOnes ( const SDNode N)
static

CanFoldXORWithAllOnes - Test whether the XOR operand is a AllOnes vector so it can be folded inside ANDNP.

Definition at line 20479 of file X86ISelLowering.cpp.

20479  {
20480  EVT VT = N->getValueType(0);
20481 
20482  // Match direct AllOnes for 128 and 256-bit vectors
20484  return true;
20485 
20486  // Look through a bit convert.
20487  if (N->getOpcode() == ISD::BITCAST)
20488  N = N->getOperand(0).getNode();
20489 
20490  // Sometimes the operand may come from a insert_subvector building a 256-bit
20491  // allones vector
20492  if (VT.is256BitVector() &&
20493  N->getOpcode() == ISD::INSERT_SUBVECTOR) {
20494  SDValue V1 = N->getOperand(0);
20495  SDValue V2 = N->getOperand(1);
20496 
20497  if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20498  V1.getOperand(0).getOpcode() == ISD::UNDEF &&
20501  return true;
20502  }
20503 
20504  return false;
20505 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
static SDValue ChangeVSETULTtoVSETULE ( SDLoc  dl,
SDValue  Op1,
SelectionDAG DAG 
)
static

Try to turn a VSETULT into a VSETULE by modifying its second operand Op1. If non-trivial (for example because it's not constant) return an empty value.

Definition at line 12143 of file X86ISelLowering.cpp.

12144 {
12146  if (!BV)
12147  return SDValue();
12148 
12149  MVT VT = Op1.getSimpleValueType();
12150  MVT EVT = VT.getVectorElementType();
12151  unsigned n = VT.getVectorNumElements();
12152  SmallVector<SDValue, 8> ULTOp1;
12153 
12154  for (unsigned i = 0; i < n; ++i) {
12156  if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
12157  return SDValue();
12158 
12159  // Avoid underflow.
12160  APInt Val = Elt->getAPIntValue();
12161  if (Val == 0)
12162  return SDValue();
12163 
12164  ULTOp1.push_back(DAG.getConstant(Val - 1, EVT));
12165  }
12166 
12167  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
12168 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
const APInt & getAPIntValue() const
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
Class for arbitrary precision integers.
Definition: APInt.h:75
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
static bool checkAndUpdateEFLAGSKill ( MachineBasicBlock::iterator  SelectItr,
MachineBasicBlock BB,
const TargetRegisterInfo TRI 
)
static

Definition at line 17422 of file X86ISelLowering.cpp.

17424  {
17425  // Scan forward through BB for a use/def of EFLAGS.
17426  MachineBasicBlock::iterator miI(std::next(SelectItr));
17427  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
17428  const MachineInstr& mi = *miI;
17429  if (mi.readsRegister(X86::EFLAGS))
17430  return false;
17431  if (mi.definesRegister(X86::EFLAGS))
17432  break; // Should have kill-flag - update below.
17433  }
17434 
17435  // If we hit the end of the block, check whether EFLAGS is live into a
17436  // successor.
17437  if (miI == BB->end()) {
17438  for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
17439  sEnd = BB->succ_end();
17440  sItr != sEnd; ++sItr) {
17441  MachineBasicBlock* succ = *sItr;
17442  if (succ->isLiveIn(X86::EFLAGS))
17443  return false;
17444  }
17445  }
17446 
17447  // We found a def, or hit the end of the basic block and EFLAGS wasn't live
17448  // out. SelectMI should have a kill flag on EFLAGS.
17449  SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
17450  return true;
17451 }
std::vector< MachineBasicBlock * >::iterator succ_iterator
bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Definition: MachineInstr.h:813
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Definition: MachineInstr.h:782
bool isLiveIn(unsigned Reg) const
static SDValue checkBoolTestSetCCCombine ( SDValue  Cmp,
X86::CondCode CC 
)
static

Definition at line 19832 of file X86ISelLowering.cpp.

19832  {
19833  // Quit if not CMP and SUB with its value result used.
19834  if (Cmp.getOpcode() != X86ISD::CMP &&
19835  (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
19836  return SDValue();
19837 
19838  // Quit if not used as a boolean value.
19839  if (CC != X86::COND_E && CC != X86::COND_NE)
19840  return SDValue();
19841 
19842  // Check CMP operands. One of them should be 0 or 1 and the other should be
19843  // an SetCC or extended from it.
19844  SDValue Op1 = Cmp.getOperand(0);
19845  SDValue Op2 = Cmp.getOperand(1);
19846 
19847  SDValue SetCC;
19848  const ConstantSDNode* C = nullptr;
19849  bool needOppositeCond = (CC == X86::COND_E);
19850  bool checkAgainstTrue = false; // Is it a comparison against 1?
19851 
19852  if ((C = dyn_cast<ConstantSDNode>(Op1)))
19853  SetCC = Op2;
19854  else if ((C = dyn_cast<ConstantSDNode>(Op2)))
19855  SetCC = Op1;
19856  else // Quit if all operands are not constants.
19857  return SDValue();
19858 
19859  if (C->getZExtValue() == 1) {
19860  needOppositeCond = !needOppositeCond;
19861  checkAgainstTrue = true;
19862  } else if (C->getZExtValue() != 0)
19863  // Quit if the constant is neither 0 or 1.
19864  return SDValue();
19865 
19866  bool truncatedToBoolWithAnd = false;
19867  // Skip (zext $x), (trunc $x), or (and $x, 1) node.
19868  while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
19869  SetCC.getOpcode() == ISD::TRUNCATE ||
19870  SetCC.getOpcode() == ISD::AND) {
19871  if (SetCC.getOpcode() == ISD::AND) {
19872  int OpIdx = -1;
19873  ConstantSDNode *CS;
19874  if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
19875  CS->getZExtValue() == 1)
19876  OpIdx = 1;
19877  if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
19878  CS->getZExtValue() == 1)
19879  OpIdx = 0;
19880  if (OpIdx == -1)
19881  break;
19882  SetCC = SetCC.getOperand(OpIdx);
19883  truncatedToBoolWithAnd = true;
19884  } else
19885  SetCC = SetCC.getOperand(0);
19886  }
19887 
19888  switch (SetCC.getOpcode()) {
19889  case X86ISD::SETCC_CARRY:
19890  // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
19891  // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
19892  // i.e. it's a comparison against true but the result of SETCC_CARRY is not
19893  // truncated to i1 using 'and'.
19894  if (checkAgainstTrue && !truncatedToBoolWithAnd)
19895  break;
19897  "Invalid use of SETCC_CARRY!");
19898  // FALL THROUGH
19899  case X86ISD::SETCC:
19900  // Set the condition code or opposite one if necessary.
19901  CC = X86::CondCode(SetCC.getConstantOperandVal(0));
19902  if (needOppositeCond)
19904  return SetCC.getOperand(1);
19905  case X86ISD::CMOV: {
19906  // Check whether false/true value has canonical one, i.e. 0 or 1.
19907  ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
19908  ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
19909  // Quit if true value is not a constant.
19910  if (!TVal)
19911  return SDValue();
19912  // Quit if false value is not a constant.
19913  if (!FVal) {
19914  SDValue Op = SetCC.getOperand(0);
19915  // Skip 'zext' or 'trunc' node.
19916  if (Op.getOpcode() == ISD::ZERO_EXTEND ||
19917  Op.getOpcode() == ISD::TRUNCATE)
19918  Op = Op.getOperand(0);
19919  // A special case for rdrand/rdseed, where 0 is set if false cond is
19920  // found.
19921  if ((Op.getOpcode() != X86ISD::RDRAND &&
19922  Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
19923  return SDValue();
19924  }
19925  // Quit if false value is not the constant 0 or 1.
19926  bool FValIsFalse = true;
19927  if (FVal && FVal->getZExtValue() != 0) {
19928  if (FVal->getZExtValue() != 1)
19929  return SDValue();
19930  // If FVal is 1, opposite cond is needed.
19931  needOppositeCond = !needOppositeCond;
19932  FValIsFalse = false;
19933  }
19934  // Quit if TVal is not the constant opposite of FVal.
19935  if (FValIsFalse && TVal->getZExtValue() != 1)
19936  return SDValue();
19937  if (!FValIsFalse && TVal->getZExtValue() != 0)
19938  return SDValue();
19939  CC = X86::CondCode(SetCC.getConstantOperandVal(2));
19940  if (needOppositeCond)
19942  return SetCC.getOperand(3);
19943  }
19944  }
19945 
19946  return SDValue();
19947 }
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
uint64_t getConstantOperandVal(unsigned i) const
bool hasAnyUseOfValue(unsigned Value) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
uint64_t getZExtValue() const
static bool clobbersFlagRegisters ( const SmallVector< StringRef, 4 > &  AsmPieces)
static

Definition at line 22253 of file X86ISelLowering.cpp.

22253  {
22254 
22255  if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
22256  if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
22257  std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
22258  std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
22259 
22260  if (AsmPieces.size() == 3)
22261  return true;
22262  else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
22263  return true;
22264  }
22265  }
22266  return false;
22267 }
int count
Definition: spill-01.py:21
static SDValue CMPEQCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 20379 of file X86ISelLowering.cpp.

20381  {
20382  unsigned opcode;
20383 
20384  // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
20385  // we're requiring SSE2 for both.
20386  if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
20387  SDValue N0 = N->getOperand(0);
20388  SDValue N1 = N->getOperand(1);
20389  SDValue CMP0 = N0->getOperand(1);
20390  SDValue CMP1 = N1->getOperand(1);
20391  SDLoc DL(N);
20392 
20393  // The SETCCs should both refer to the same CMP.
20394  if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
20395  return SDValue();
20396 
20397  SDValue CMP00 = CMP0->getOperand(0);
20398  SDValue CMP01 = CMP0->getOperand(1);
20399  EVT VT = CMP00.getValueType();
20400 
20401  if (VT == MVT::f32 || VT == MVT::f64) {
20402  bool ExpectingFlags = false;
20403  // Check for any users that want flags:
20404  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
20405  !ExpectingFlags && UI != UE; ++UI)
20406  switch (UI->getOpcode()) {
20407  default:
20408  case ISD::BR_CC:
20409  case ISD::BRCOND:
20410  case ISD::SELECT:
20411  ExpectingFlags = true;
20412  break;
20413  case ISD::CopyToReg:
20414  case ISD::SIGN_EXTEND:
20415  case ISD::ZERO_EXTEND:
20416  case ISD::ANY_EXTEND:
20417  break;
20418  }
20419 
20420  if (!ExpectingFlags) {
20421  enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
20422  enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
20423 
20424  if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
20425  X86::CondCode tmp = cc0;
20426  cc0 = cc1;
20427  cc1 = tmp;
20428  }
20429 
20430  if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
20431  (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
20432  // FIXME: need symbolic constants for these magic numbers.
20433  // See X86ATTInstPrinter.cpp:printSSECC().
20434  unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
20435  if (Subtarget->hasAVX512()) {
20436  SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
20437  CMP01, DAG.getConstant(x86cc, MVT::i8));
20438  if (N->getValueType(0) != MVT::i1)
20439  return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0),
20440  FSetCC);
20441  return FSetCC;
20442  }
20443  SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
20444  CMP00.getValueType(), CMP00, CMP01,
20445  DAG.getConstant(x86cc, MVT::i8));
20446 
20447  bool is64BitFP = (CMP00.getValueType() == MVT::f64);
20448  MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
20449 
20450  if (is64BitFP && !Subtarget->is64Bit()) {
20451  // On a 32-bit target, we cannot bitcast the 64-bit float to a
20452  // 64-bit integer, since that's not a legal type. Since
20453  // OnesOrZeroesF is all ones of all zeroes, we don't need all the
20454  // bits, but can do this little dance to extract the lowest 32 bits
20455  // and work with those going forward.
20456  SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
20457  OnesOrZeroesF);
20458  SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
20459  Vector64);
20460  OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
20461  Vector32, DAG.getIntPtrConstant(0));
20462  IntVT = MVT::i32;
20463  }
20464 
20465  SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
20466  SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
20467  DAG.getConstant(1, IntVT));
20468  SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
20469  return OneBitOfTruth;
20470  }
20471  }
20472  }
20473  }
20474  return SDValue();
20475 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
bool hasSSE2() const
Definition: X86Subtarget.h:312
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc)
unsigned getOpcode() const
use_iterator use_begin() const
***NAME is the name of the raw_ostream unsigned & i1
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
uint64_t getConstantOperandVal(unsigned i) const
bool hasAVX512() const
Definition: X86Subtarget.h:319
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static bool combineRedundantDWordShuffle ( SDValue  N,
MutableArrayRef< int >  Mask,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

Search for a combinable shuffle across a chain ending in pshufd.

We walk up the chain and look for a combinable shuffle, skipping over shuffles that we could hoist this shuffle's transformation past without altering anything.

Definition at line 18496 of file X86ISelLowering.cpp.

18498  {
18499  assert(N.getOpcode() == X86ISD::PSHUFD &&
18500  "Called with something other than an x86 128-bit half shuffle!");
18501  SDLoc DL(N);
18502 
18503  // Walk up a single-use chain looking for a combinable shuffle.
18504  SDValue V = N.getOperand(0);
18505  for (; V.hasOneUse(); V = V.getOperand(0)) {
18506  switch (V.getOpcode()) {
18507  default:
18508  return false; // Nothing combined!
18509 
18510  case ISD::BITCAST:
18511  // Skip bitcasts as we always know the type for the target specific
18512  // instructions.
18513  continue;
18514 
18515  case X86ISD::PSHUFD:
18516  // Found another dword shuffle.
18517  break;
18518 
18519  case X86ISD::PSHUFLW:
18520  // Check that the low words (being shuffled) are the identity in the
18521  // dword shuffle, and the high words are self-contained.
18522  if (Mask[0] != 0 || Mask[1] != 1 ||
18523  !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
18524  return false;
18525 
18526  continue;
18527 
18528  case X86ISD::PSHUFHW:
18529  // Check that the high words (being shuffled) are the identity in the
18530  // dword shuffle, and the low words are self-contained.
18531  if (Mask[2] != 2 || Mask[3] != 3 ||
18532  !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
18533  return false;
18534 
18535  continue;
18536 
18537  case X86ISD::UNPCKL:
18538  case X86ISD::UNPCKH:
18539  // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
18540  // shuffle into a preceding word shuffle.
18541  if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
18542  return false;
18543 
18544  // Search for a half-shuffle which we can combine with.
18545  unsigned CombineOp =
18547  if (V.getOperand(0) != V.getOperand(1) ||
18548  !V->isOnlyUserOf(V.getOperand(0).getNode()))
18549  return false;
18550  V = V.getOperand(0);
18551  do {
18552  switch (V.getOpcode()) {
18553  default:
18554  return false; // Nothing to combine.
18555 
18556  case X86ISD::PSHUFLW:
18557  case X86ISD::PSHUFHW:
18558  if (V.getOpcode() == CombineOp)
18559  break;
18560 
18561  // Fallthrough!
18562  case ISD::BITCAST:
18563  V = V.getOperand(0);
18564  continue;
18565  }
18566  break;
18567  } while (V.hasOneUse());
18568  break;
18569  }
18570  // Break out of the loop if we break out of the switch.
18571  break;
18572  }
18573 
18574  if (!V.hasOneUse())
18575  // We fell out of the loop without finding a viable combining instruction.
18576  return false;
18577 
18578  // Record the old value to use in RAUW-ing.
18579  SDValue Old = V;
18580 
18581  // Merge this node's mask and our incoming mask.
18583  for (int &M : Mask)
18584  M = VMask[M];
18585  V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
18586  getV4X86ShuffleImm8ForMask(Mask, DAG));
18587 
18588  // It is possible that one of the combinable shuffles was completely absorbed
18589  // by the other, just replace it and revisit all users in that case.
18590  if (Old.getNode() == V.getNode()) {
18591  DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true);
18592  return true;
18593  }
18594 
18595  // Replace N with its operand as we're going to combine that shuffle away.
18596  DAG.ReplaceAllUsesWith(N, N.getOperand(0));
18597 
18598  // Replace the combinable shuffle with the combined one, updating all users
18599  // so that we re-evaluate the chain here.
18600  DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
18601  return true;
18602 }
bool hasOneUse() const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
unsigned getOpcode() const
static SmallVector< int, 4 > getPSHUFShuffleMask(SDValue N)
Get the PSHUF-style mask from PSHUF node.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
void ReplaceAllUsesWith(SDValue From, SDValue Op)
EVT getValueType() const
bool isOnlyUserOf(SDNode *N) const
static bool combineRedundantHalfShuffle ( SDValue  N,
MutableArrayRef< int >  Mask,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.

We walk up the chain, skipping shuffles of the other half and looking through shuffles which switch halves trying to find a shuffle of the same pair of dwords.

Definition at line 18609 of file X86ISelLowering.cpp.

18611  {
18612  assert(
18613  (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
18614  "Called with something other than an x86 128-bit half shuffle!");
18615  SDLoc DL(N);
18616  unsigned CombineOpcode = N.getOpcode();
18617 
18618  // Walk up a single-use chain looking for a combinable shuffle.
18619  SDValue V = N.getOperand(0);
18620  for (; V.hasOneUse(); V = V.getOperand(0)) {
18621  switch (V.getOpcode()) {
18622  default:
18623  return false; // Nothing combined!
18624 
18625  case ISD::BITCAST:
18626  // Skip bitcasts as we always know the type for the target specific
18627  // instructions.
18628  continue;
18629 
18630  case X86ISD::PSHUFLW:
18631  case X86ISD::PSHUFHW:
18632  if (V.getOpcode() == CombineOpcode)
18633  break;
18634 
18635  // Other-half shuffles are no-ops.
18636  continue;
18637 
18638  case X86ISD::PSHUFD: {
18639  // We can only handle pshufd if the half we are combining either stays in
18640  // its half, or switches to the other half. Bail if one of these isn't
18641  // true.
18643  int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2;
18644  if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) ||
18645  (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2)))
18646  return false;
18647 
18648  // Map the mask through the pshufd and keep walking up the chain.
18649  for (int i = 0; i < 4; ++i)
18650  Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2;
18651 
18652  // Switch halves if the pshufd does.
18653  CombineOpcode =
18654  VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
18655  continue;
18656  }
18657  }
18658  // Break out of the loop if we break out of the switch.
18659  break;
18660  }
18661 
18662  if (!V.hasOneUse())
18663  // We fell out of the loop without finding a viable combining instruction.
18664  return false;
18665 
18666  // Record the old value to use in RAUW-ing.
18667  SDValue Old = V;
18668 
18669  // Merge this node's mask and our incoming mask (adjusted to account for all
18670  // the pshufd instructions encountered).
18672  for (int &M : Mask)
18673  M = VMask[M];
18674  V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
18675  getV4X86ShuffleImm8ForMask(Mask, DAG));
18676 
18677  // Replace N with its operand as we're going to combine that shuffle away.
18678  DAG.ReplaceAllUsesWith(N, N.getOperand(0));
18679 
18680  // Replace the combinable shuffle with the combined one, updating all users
18681  // so that we re-evaluate the chain here.
18682  DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
18683  return true;
18684 }
bool hasOneUse() const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
unsigned getOpcode() const
static SmallVector< int, 4 > getPSHUFShuffleMask(SDValue N)
Get the PSHUF-style mask from PSHUF node.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
void ReplaceAllUsesWith(SDValue From, SDValue Op)
static void CommuteVectorShuffleMask ( SmallVectorImpl< int > &  Mask,
unsigned  NumElems 
)
static

CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming the two vector operands have swapped position.

Definition at line 3849 of file X86ISelLowering.cpp.

3850  {
3851  for (unsigned i = 0; i != NumElems; ++i) {
3852  int idx = Mask[i];
3853  if (idx < 0)
3854  continue;
3855  else if (idx < (int)NumElems)
3856  Mask[i] = idx + NumElems;
3857  else
3858  Mask[i] = idx - NumElems;
3859  }
3860 }
static SDValue Compact8x32ShuffleNode ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG 
)
static

Definition at line 4038 of file X86ISelLowering.cpp.

4039  {
4040  MVT VT = SVOp->getSimpleValueType(0);
4041  SDLoc dl(SVOp);
4042 
4043  if (VT != MVT::v8i32 && VT != MVT::v8f32)
4044  return SDValue();
4045 
4046  ArrayRef<int> Mask = SVOp->getMask();
4047 
4048  // These are the special masks that may be optimized.
4049  static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
4050  static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
4051  bool MatchEvenMask = true;
4052  bool MatchOddMask = true;
4053  for (int i=0; i<8; ++i) {
4054  if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
4055  MatchEvenMask = false;
4056  if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
4057  MatchOddMask = false;
4058  }
4059 
4060  if (!MatchEvenMask && !MatchOddMask)
4061  return SDValue();
4062 
4063  SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
4064 
4065  SDValue Op0 = SVOp->getOperand(0);
4066  SDValue Op1 = SVOp->getOperand(1);
4067 
4068  if (MatchEvenMask) {
4069  // Shift the second operand right to 32 bits.
4070  static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
4071  Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
4072  } else {
4073  // Shift the first operand left to 32 bits.
4074  static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
4075  Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
4076  }
4077  static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
4078  return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
4079 }
static bool isUndefOrEqual(int Val, int CmpVal)
const SDValue & getOperand(unsigned Num) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
ArrayRef< int > getMask() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getSimpleValueType(unsigned ResNo) const
static SDValue Concat128BitVectors ( SDValue  V1,
SDValue  V2,
EVT  VT,
unsigned  NumElems,
SelectionDAG DAG,
SDLoc  dl 
)
static

Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 instructions. This is used because creating CONCAT_VECTOR nodes of BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower large BUILD_VECTORS.

Definition at line 180 of file X86ISelLowering.cpp.

182  {
183  SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
184  return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
185 }
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
static SDValue Concat256BitVectors ( SDValue  V1,
SDValue  V2,
EVT  VT,
unsigned  NumElems,
SelectionDAG DAG,
SDLoc  dl 
)
static

Definition at line 187 of file X86ISelLowering.cpp.

189  {
190  SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
191  return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
192 }
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
static SDValue CreateCopyOfByValArgument ( SDValue  Src,
SDValue  Dst,
SDValue  Chain,
ISD::ArgFlagsTy  Flags,
SelectionDAG DAG,
SDLoc  dl 
)
static

CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst" with size and alignment information specified by the specific parameter attribute. The copy will be passed as a byval function parameter.

Definition at line 2160 of file X86ISelLowering.cpp.

2162  {
2163  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2164 
2165  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2166  /*isVolatile*/false, /*AlwaysInline=*/true,
2168 }
unsigned getByValSize() const
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
unsigned getByValAlign() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static TargetLoweringObjectFile* createTLOF ( const Triple TT)
static

Definition at line 194 of file X86ISelLowering.cpp.

194  {
195  if (TT.isOSBinFormatMachO()) {
196  if (TT.getArch() == Triple::x86_64)
197  return new X86_64MachoTargetObjectFile();
198  return new TargetLoweringObjectFileMachO();
199  }
200 
201  if (TT.isOSLinux())
202  return new X86LinuxTargetObjectFile();
203  if (TT.isOSBinFormatELF())
204  return new TargetLoweringObjectFileELF();
206  return new X86WindowsTargetObjectFile();
207  if (TT.isOSBinFormatCOFF())
208  return new TargetLoweringObjectFileCOFF();
209  llvm_unreachable("unknown subtarget type");
210 }
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
Definition: Triple.h:428
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
bool isOSLinux() const
Tests whether the OS is Linux.
Definition: Triple.h:413
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:213
This implementation is used for Windows targets on x86 and x86-64.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:423
bool isOSBinFormatELF() const
Tests whether the OS uses the ELF binary format.
Definition: Triple.h:418
bool isKnownWindowsMSVCEnvironment() const
Test whether the architecture is 64-bit.
Definition: Triple.h:374
static SDValue EltsFromConsecutiveLoads ( EVT  VT,
SmallVectorImpl< SDValue > &  Elts,
SDLoc DL,
SelectionDAG DAG,
bool  isAfterLegalize 
)
static

EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a vector of type 'VT', see if the elements can be replaced by a single large load which has the same value as a build_vector whose operands are 'elts'.

Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a

FIXME: we'd also like to handle the case where the last elements are zero rather than undef via VZEXT_LOAD, but we do not detect that case today. There's even a handy isZeroNode for that purpose.

Definition at line 5643 of file X86ISelLowering.cpp.

5645  {
5646  EVT EltVT = VT.getVectorElementType();
5647  unsigned NumElems = Elts.size();
5648 
5649  LoadSDNode *LDBase = nullptr;
5650  unsigned LastLoadedElt = -1U;
5651 
5652  // For each element in the initializer, see if we've found a load or an undef.
5653  // If we don't find an initial load element, or later load elements are
5654  // non-consecutive, bail out.
5655  for (unsigned i = 0; i < NumElems; ++i) {
5656  SDValue Elt = Elts[i];
5657 
5658  if (!Elt.getNode() ||
5659  (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
5660  return SDValue();
5661  if (!LDBase) {
5662  if (Elt.getNode()->getOpcode() == ISD::UNDEF)
5663  return SDValue();
5664  LDBase = cast<LoadSDNode>(Elt.getNode());
5665  LastLoadedElt = i;
5666  continue;
5667  }
5668  if (Elt.getOpcode() == ISD::UNDEF)
5669  continue;
5670 
5671  LoadSDNode *LD = cast<LoadSDNode>(Elt);
5672  if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
5673  return SDValue();
5674  LastLoadedElt = i;
5675  }
5676 
5677  // If we have found an entire vector of loads and undefs, then return a large
5678  // load of the entire vector width starting at the base pointer. If we found
5679  // consecutive loads for the low half, generate a vzext_load node.
5680  if (LastLoadedElt == NumElems - 1) {
5681 
5682  if (isAfterLegalize &&
5684  return SDValue();
5685 
5686  SDValue NewLd = SDValue();
5687 
5688  if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
5689  NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
5690  LDBase->getPointerInfo(),
5691  LDBase->isVolatile(), LDBase->isNonTemporal(),
5692  LDBase->isInvariant(), 0);
5693  NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
5694  LDBase->getPointerInfo(),
5695  LDBase->isVolatile(), LDBase->isNonTemporal(),
5696  LDBase->isInvariant(), LDBase->getAlignment());
5697 
5698  if (LDBase->hasAnyUseOfValue(1)) {
5699  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
5700  SDValue(LDBase, 1),
5701  SDValue(NewLd.getNode(), 1));
5702  DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
5703  DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
5704  SDValue(NewLd.getNode(), 1));
5705  }
5706 
5707  return NewLd;
5708  }
5709  if (NumElems == 4 && LastLoadedElt == 1 &&
5710  DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
5711  SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
5712  SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
5713  SDValue ResNode =
5714  DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
5715  LDBase->getPointerInfo(),
5716  LDBase->getAlignment(),
5717  false/*isVolatile*/, true/*ReadMem*/,
5718  false/*WriteMem*/);
5719 
5720  // Make sure the newly-created LOAD is in the same position as LDBase in
5721  // terms of dependency. We create a TokenFactor for LDBase and ResNode, and
5722  // update uses of LDBase's output chain to use the TokenFactor.
5723  if (LDBase->hasAnyUseOfValue(1)) {
5724  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
5725  SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
5726  DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
5727  DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
5728  SDValue(ResNode.getNode(), 1));
5729  }
5730 
5731  return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
5732  }
5733  return SDValue();
5734 }
unsigned InferPtrAlignment(SDValue Ptr) const
unsigned getOpcode() const
const SDValue & getBasePtr() const
bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
SDVTList getVTList(EVT VT)
EVT getVectorElementType() const
Definition: ValueTypes.h:217
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
bool isNonTemporal() const
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned getOpcode() const
bool isVolatile() const
const MachinePointerInfo & getPointerInfo() const
bool isInvariant() const
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
const SDValue & getChain() const
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool hasAnyUseOfValue(unsigned Value) const
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool isNON_EXTLoad(const SDNode *N)
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
unsigned getAlignment() const
static MachineBasicBlock* EmitMonitor ( MachineInstr MI,
MachineBasicBlock BB,
const TargetInstrInfo TII,
const X86Subtarget Subtarget 
)
static

Definition at line 17059 of file X86ISelLowering.cpp.

17061  {
17062  DebugLoc dl = MI->getDebugLoc();
17063 
17064  // Address into RAX/EAX, other two args into ECX, EDX.
17065  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
17066  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
17067  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
17068  for (int i = 0; i < X86::AddrNumOperands; ++i)
17069  MIB.addOperand(MI->getOperand(i));
17070 
17071  unsigned ValOps = X86::AddrNumOperands;
17072  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
17073  .addReg(MI->getOperand(ValOps).getReg());
17074  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
17075  .addReg(MI->getOperand(ValOps+1).getReg());
17076 
17077  // The instruction doesn't actually take any operands though.
17078  BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
17079 
17080  MI->eraseFromParent(); // The pseudo is gone now.
17081  return BB;
17082 }
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:276
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
unsigned getReg() const
getReg - Returns the register number.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:245
static MachineBasicBlock* EmitPCMPSTRI ( MachineInstr MI,
MachineBasicBlock BB,
const TargetInstrInfo TII 
)
static

Definition at line 17024 of file X86ISelLowering.cpp.

17025  {
17026  unsigned Opc;
17027  switch (MI->getOpcode()) {
17028  default: llvm_unreachable("illegal opcode!");
17029  case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
17030  case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
17031  case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
17032  case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
17033  case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
17034  case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
17035  case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
17036  case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
17037  }
17038 
17039  DebugLoc dl = MI->getDebugLoc();
17040  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
17041 
17042  unsigned NumArgs = MI->getNumOperands(); // remove the results
17043  for (unsigned i = 1; i < NumArgs; ++i) {
17044  MachineOperand &Op = MI->getOperand(i);
17045  if (!(Op.isReg() && Op.isImplicit()))
17046  MIB.addOperand(Op);
17047  }
17048  if (MI->hasOneMemOperand())
17049  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
17050 
17051  BuildMI(*BB, MI, dl,
17052  TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
17053  .addReg(X86::ECX);
17054 
17055  MI->eraseFromParent();
17056  return BB;
17057 }
bool isImplicit() const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getNumOperands() const
Definition: MachineInstr.h:274
int getOpcode() const
Definition: MachineInstr.h:270
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:340
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:276
bool hasOneMemOperand() const
Definition: MachineInstr.h:352
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
unsigned getReg() const
getReg - Returns the register number.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:245
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:339
static MachineBasicBlock* EmitPCMPSTRM ( MachineInstr MI,
MachineBasicBlock BB,
const TargetInstrInfo TII 
)
static

Definition at line 16987 of file X86ISelLowering.cpp.

16988  {
16989  unsigned Opc;
16990  switch (MI->getOpcode()) {
16991  default: llvm_unreachable("illegal opcode!");
16992  case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
16993  case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
16994  case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
16995  case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
16996  case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
16997  case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
16998  case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
16999  case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
17000  }
17001 
17002  DebugLoc dl = MI->getDebugLoc();
17003  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
17004 
17005  unsigned NumArgs = MI->getNumOperands();
17006  for (unsigned i = 1; i < NumArgs; ++i) {
17007  MachineOperand &Op = MI->getOperand(i);
17008  if (!(Op.isReg() && Op.isImplicit()))
17009  MIB.addOperand(Op);
17010  }
17011  if (MI->hasOneMemOperand())
17012  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
17013 
17014  BuildMI(*BB, MI, dl,
17015  TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
17016  .addReg(X86::XMM0);
17017 
17018  MI->eraseFromParent();
17019  return BB;
17020 }
bool isImplicit() const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
bool isReg() const
isReg - Tests if this is a MO_Register operand.
unsigned getNumOperands() const
Definition: MachineInstr.h:274
int getOpcode() const
Definition: MachineInstr.h:270
mmo_iterator memoperands_end() const
Definition: MachineInstr.h:340
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:276
bool hasOneMemOperand() const
Definition: MachineInstr.h:352
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
unsigned getReg() const
getReg - Returns the register number.
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:245
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:339
static SDValue EmitTailCallStoreRetAddr ( SelectionDAG DAG,
MachineFunction MF,
SDValue  Chain,
SDValue  RetAddrFrIdx,
EVT  PtrVT,
unsigned  SlotSize,
int  FPDiff,
SDLoc  dl 
)
static

EmitTailCallStoreRetAddr - Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!=0).

Definition at line 2566 of file X86ISelLowering.cpp.

2569  {
2570  // Store the return address to the appropriate stack slot.
2571  if (!FPDiff) return Chain;
2572  // Calculate the new stack slot for the return address.
2573  int NewReturnAddrFI =
2574  MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2575  false);
2576  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2577  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2578  MachinePointerInfo::getFixedStack(NewReturnAddrFI),
2579  false, false, 0);
2580  return Chain;
2581 }
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=nullptr)
MachineFrameInfo * getFrameInfo()
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable)
static MachineBasicBlock* EmitXBegin ( MachineInstr MI,
MachineBasicBlock MBB,
const TargetInstrInfo TII 
)
static

Utility function to emit xbegin specifying the start of an RTM region.

Definition at line 16929 of file X86ISelLowering.cpp.

16930  {
16931  DebugLoc DL = MI->getDebugLoc();
16932 
16933  const BasicBlock *BB = MBB->getBasicBlock();
16935  ++I;
16936 
16937  // For the v = xbegin(), we generate
16938  //
16939  // thisMBB:
16940  // xbegin sinkMBB
16941  //
16942  // mainMBB:
16943  // eax = -1
16944  //
16945  // sinkMBB:
16946  // v = eax
16947 
16948  MachineBasicBlock *thisMBB = MBB;
16949  MachineFunction *MF = MBB->getParent();
16950  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
16951  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
16952  MF->insert(I, mainMBB);
16953  MF->insert(I, sinkMBB);
16954 
16955  // Transfer the remainder of BB and its successor edges to sinkMBB.
16956  sinkMBB->splice(sinkMBB->begin(), MBB,
16958  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
16959 
16960  // thisMBB:
16961  // xbegin sinkMBB
16962  // # fallthrough to mainMBB
16963  // # abortion to sinkMBB
16964  BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
16965  thisMBB->addSuccessor(mainMBB);
16966  thisMBB->addSuccessor(sinkMBB);
16967 
16968  // mainMBB:
16969  // EAX = -1
16970  BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
16971  mainMBB->addSuccessor(sinkMBB);
16972 
16973  // sinkMBB:
16974  // EAX is live into the sinkMBB
16975  sinkMBB->addLiveIn(X86::EAX);
16976  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
16977  TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
16978  .addReg(X86::EAX);
16979 
16980  MI->eraseFromParent();
16981  return sinkMBB;
16982 }
const MachineFunction * getParent() const
void addLiveIn(unsigned Reg)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
const BasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:72
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:276
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
const MCInstrDesc & get(unsigned Opcode) const
Definition: MCInstrInfo.h:48
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getReg() const
getReg - Returns the register number.
void insert(iterator MBBI, MachineBasicBlock *MBB)
BasicBlockListType::iterator iterator
void addSuccessor(MachineBasicBlock *succ, uint32_t weight=0)
DebugLoc getDebugLoc() const
Definition: MachineInstr.h:245
static SDValue ExpandHorizontalBinOp ( const SDValue V0,
const SDValue V1,
SDLoc  DL,
SelectionDAG DAG,
unsigned  X86Opcode,
bool  Mode,
bool  isUndefLO,
bool  isUndefHI 
)
static

Emit a sequence of two 128-bit horizontal add/sub followed by a concat_vector.

This is a helper function of PerformBUILD_VECTORCombine. This function expects two 256-bit vectors called V0 and V1. At first, each vector is split into two separate 128-bit vectors. Then, the resulting 128-bit vectors are used to implement two horizontal binary operations.

The kind of horizontal binary operation is defined by X86Opcode.

Mode specifies how the 128-bit parts of V0 and V1 are passed in input to the two new horizontal binop. When Mode is set, the first horizontal binop dag node would take as input the lower 128-bit of V0 and the upper 128-bit of V0. The second horizontal binop dag node would take as input the lower 128-bit of V1 and the upper 128-bit of V1. Example: HADD V0_LO, V0_HI HADD V1_LO, V1_HI

Otherwise, the first horizontal binop dag node takes as input the lower 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1. Example: HADD V0_LO, V1_LO HADD V0_HI, V1_HI

If isUndefLO is set, then the algorithm propagates UNDEF to the lower 128-bits of the result. If isUndefHI is set, then UNDEF is propagated to the upper 128-bits of the result.

Definition at line 6187 of file X86ISelLowering.cpp.

6190  {
6191  EVT VT = V0.getValueType();
6192  assert(VT.is256BitVector() && VT == V1.getValueType() &&
6193  "Invalid nodes in input!");
6194 
6195  unsigned NumElts = VT.getVectorNumElements();
6196  SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
6197  SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
6198  SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
6199  SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
6200  EVT NewVT = V0_LO.getValueType();
6201 
6202  SDValue LO = DAG.getUNDEF(NewVT);
6203  SDValue HI = DAG.getUNDEF(NewVT);
6204 
6205  if (Mode) {
6206  // Don't emit a horizontal binop if the result is expected to be UNDEF.
6207  if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
6208  LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
6209  if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
6210  HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
6211  } else {
6212  // Don't emit a horizontal binop if the result is expected to be UNDEF.
6213  if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
6214  V1_LO->getOpcode() != ISD::UNDEF))
6215  LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
6216 
6217  if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
6218  V1_HI->getOpcode() != ISD::UNDEF))
6219  HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
6220  }
6221 
6222  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
6223 }
unsigned getOpcode() const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue Extract128BitVector ( SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl 
)
static

Generate a DAG to grab 128-bits from a vector > 128 bits. This sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 instructions or a simple subregister reference. Idx is an index in the 128 bits we want. It need not be aligned to a 128-bit bounday. That makes lowering EXTRACT_VECTOR_ELT operations easier.

Definition at line 118 of file X86ISelLowering.cpp.

119  {
121  Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
122  return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
123 }
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
EVT getValueType() const
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:146
static SDValue Extract256BitVector ( SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl 
)
static

Generate a DAG to grab 256-bits from a 512-bit vector.

Definition at line 126 of file X86ISelLowering.cpp.

127  {
128  assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
129  return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
130 }
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
assert(Globals.size() > 1)
EVT getValueType() const
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:146
static SDValue ExtractSubVector ( SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl,
unsigned  vectorWidth 
)
static

Definition at line 76 of file X86ISelLowering.cpp.

78  {
79  assert((vectorWidth == 128 || vectorWidth == 256) &&
80  "Unsupported vector width");
81  EVT VT = Vec.getValueType();
82  EVT ElVT = VT.getVectorElementType();
83  unsigned Factor = VT.getSizeInBits()/vectorWidth;
84  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
85  VT.getVectorNumElements()/Factor);
86 
87  // Extract from UNDEF is UNDEF.
88  if (Vec.getOpcode() == ISD::UNDEF)
89  return DAG.getUNDEF(ResultVT);
90 
91  // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
92  unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
93 
94  // This is the index of the first element of the vectorWidth-bit chunk
95  // we want.
96  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
97  * ElemsPerChunk);
98 
99  // If the input is a buildvector just emit a smaller one.
100  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
101  return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
102  makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
103  ElemsPerChunk));
104 
105  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
106  SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
107  VecIdx);
108 
109  return Result;
110 
111 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:276
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
unsigned getOpcode() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
op_iterator op_begin() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool FuncIsMadeTailCallSafe ( CallingConv::ID  CC,
bool  GuaranteedTailCallOpt 
)
static

FuncIsMadeTailCallSafe - Return true if the function is being made into a tailcall target by changing its ABI.

Definition at line 2197 of file X86ISelLowering.cpp.

2198  {
2199  return GuaranteedTailCallOpt && IsTailCallConvention(CC);
2200 }
static bool IsTailCallConvention(CallingConv::ID CC)
static unsigned getExtractVEXTRACTImmediate ( SDNode N,
unsigned  vecWidth 
)
static

Definition at line 4696 of file X86ISelLowering.cpp.

4696  {
4697  assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
4698  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
4699  llvm_unreachable("Illegal extract subvector for VEXTRACT");
4700 
4701  uint64_t Index =
4702  cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
4703 
4704  MVT VecVT = N->getOperand(0).getSimpleValueType();
4705  MVT ElVT = VecVT.getVectorElementType();
4706 
4707  unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
4708  return Index / NumElemsPerChunk;
4709 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
MVT getVectorElementType() const
static SDValue getGatherNode ( unsigned  Opc,
SDValue  Op,
SelectionDAG DAG,
SDValue  Src,
SDValue  Mask,
SDValue  Base,
SDValue  Index,
SDValue  ScaleOp,
SDValue  Chain,
const X86Subtarget Subtarget 
)
static

Definition at line 14204 of file X86ISelLowering.cpp.

14207  {
14208  SDLoc dl(Op);
14209  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
14210  assert(C && "Invalid scale type");
14211  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
14212  EVT MaskVT = MVT::getVectorVT(MVT::i1,
14214  SDValue MaskInReg;
14215  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
14216  if (MaskC)
14217  MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
14218  else
14219  MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
14220  SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
14221  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
14222  SDValue Segment = DAG.getRegister(0, MVT::i32);
14223  if (Src.getOpcode() == ISD::UNDEF)
14224  Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
14225  SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
14226  SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
14227  SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
14228  return DAG.getMergeValues(RetOps, dl);
14229 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
SDVTList getVTList(EVT VT)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
unsigned getOpcode() const
***NAME is the name of the raw_ostream unsigned & i1
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
int64_t getSExtValue() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getRegister(unsigned Reg, EVT VT)
uint64_t getZExtValue() const
static SDValue getINSERTPS ( ShuffleVectorSDNode SVOp,
SDLoc dl,
SelectionDAG DAG 
)
static

Definition at line 9028 of file X86ISelLowering.cpp.

9029  {
9030  // Generate an insertps instruction when inserting an f32 from memory onto a
9031  // v4f32 or when copying a member from one v4f32 to another.
9032  // We also use it for transferring i32 from one register to another,
9033  // since it simply copies the same bits.
9034  // If we're transferring an i32 from memory to a specific element in a
9035  // register, we output a generic DAG that will match the PINSRD
9036  // instruction.
9037  MVT VT = SVOp->getSimpleValueType(0);
9038  MVT EVT = VT.getVectorElementType();
9039  SDValue V1 = SVOp->getOperand(0);
9040  SDValue V2 = SVOp->getOperand(1);
9041  auto Mask = SVOp->getMask();
9042  assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
9043  "unsupported vector type for insertps/pinsrd");
9044 
9045  auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
9046  auto FromV2Predicate = [](const int &i) { return i >= 4; };
9047  int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
9048 
9049  SDValue From;
9050  SDValue To;
9051  unsigned DestIndex;
9052  if (FromV1 == 1) {
9053  From = V1;
9054  To = V2;
9055  DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
9056  Mask.begin();
9057 
9058  // If we have 1 element from each vector, we have to check if we're
9059  // changing V1's element's place. If so, we're done. Otherwise, we
9060  // should assume we're changing V2's element's place and behave
9061  // accordingly.
9062  int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate);
9063  if (FromV1 == FromV2 && DestIndex == Mask[DestIndex] % 4) {
9064  From = V2;
9065  To = V1;
9066  DestIndex =
9067  std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
9068  }
9069  } else {
9070  assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
9071  "More than one element from V1 and from V2, or no elements from one "
9072  "of the vectors. This case should not have returned true from "
9073  "isINSERTPSMask");
9074  From = V2;
9075  To = V1;
9076  DestIndex =
9077  std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
9078  }
9079 
9080  // Get an index into the source vector in the range [0,4) (the mask is
9081  // in the range [0,8) because it can address V1 and V2)
9082  unsigned SrcIndex = Mask[DestIndex] % 4;
9083  if (MayFoldLoad(From)) {
9084  // Trivial case, when From comes from a load and is only used by the
9085  // shuffle. Make it use insertps from the vector that we need from that
9086  // load.
9087  SDValue NewLoad =
9088  NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG);
9089  if (!NewLoad.getNode())
9090  return SDValue();
9091 
9092  if (EVT == MVT::f32) {
9093  // Create this as a scalar to vector to match the instruction pattern.
9094  SDValue LoadScalarToVector =
9095  DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, NewLoad);
9096  SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4);
9097  return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, LoadScalarToVector,
9098  InsertpsMask);
9099  } else { // EVT == MVT::i32
9100  // If we're getting an i32 from memory, use an INSERT_VECTOR_ELT
9101  // instruction, to match the PINSRD instruction, which loads an i32 to a
9102  // certain vector element.
9103  return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, To, NewLoad,
9104  DAG.getConstant(DestIndex, MVT::i32));
9105  }
9106  }
9107 
9108  // Vector-element-to-vector
9109  SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
9110  return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
9111 }
static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, SelectionDAG &DAG)
const SDValue & getOperand(unsigned Num) const
static bool MayFoldLoad(SDValue Op)
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
ArrayRef< int > getMask() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static unsigned getInsertVINSERTImmediate ( SDNode N,
unsigned  vecWidth 
)
static

Definition at line 4711 of file X86ISelLowering.cpp.

4711  {
4712  assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
4713  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
4714  llvm_unreachable("Illegal insert subvector for VINSERT");
4715 
4716  uint64_t Index =
4717  cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
4718 
4719  MVT VecVT = N->getSimpleValueType(0);
4720  MVT ElVT = VecVT.getVectorElementType();
4721 
4722  unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
4723  return Index / NumElemsPerChunk;
4724 }
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue getLegalSplat ( SelectionDAG DAG,
SDValue  V,
int  EltNo 
)
static

getLegalSplat - Generate a legal splat with supported x86 shuffles

Definition at line 5022 of file X86ISelLowering.cpp.

5022  {
5023  MVT VT = V.getSimpleValueType();
5024  SDLoc dl(V);
5025 
5026  if (VT.is128BitVector()) {
5027  V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
5028  int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
5029  V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
5030  &SplatMask[0]);
5031  } else if (VT.is256BitVector()) {
5032  // To use VPERMILPS to splat scalars, the second half of indicies must
5033  // refer to the higher part, which is a duplication of the lower one,
5034  // because VPERMILPS can only handle in-lane permutations.
5035  int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
5036  EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
5037 
5038  V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
5039  V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
5040  &SplatMask[0]);
5041  } else
5042  llvm_unreachable("Vector size not supported");
5043 
5044  return DAG.getNode(ISD::BITCAST, dl, VT, V);
5045 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static void getMaxByValAlign ( Type Ty,
unsigned &  MaxAlign 
)
static

getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment.

Definition at line 1646 of file X86ISelLowering.cpp.

1646  {
1647  if (MaxAlign == 16)
1648  return;
1649  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1650  if (VTy->getBitWidth() == 128)
1651  MaxAlign = 16;
1652  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1653  unsigned EltAlign = 0;
1654  getMaxByValAlign(ATy->getElementType(), EltAlign);
1655  if (EltAlign > MaxAlign)
1656  MaxAlign = EltAlign;
1657  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1658  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1659  unsigned EltAlign = 0;
1660  getMaxByValAlign(STy->getElementType(i), EltAlign);
1661  if (EltAlign > MaxAlign)
1662  MaxAlign = EltAlign;
1663  if (MaxAlign == 16)
1664  break;
1665  }
1666  }
1667 }
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign)
static SDValue getMOVDDup ( SDValue Op,
SDLoc dl,
SDValue  V1,
SelectionDAG DAG 
)
static

Definition at line 8907 of file X86ISelLowering.cpp.

8907  {
8908  MVT VT = Op.getSimpleValueType();
8909 
8910  // Canonizalize to v2f64.
8911  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
8912  return DAG.getNode(ISD::BITCAST, dl, VT,
8913  getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
8914  V1, DAG));
8915 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue getMOVHighToLow ( SDValue Op,
SDLoc dl,
SelectionDAG DAG 
)
static

Definition at line 8937 of file X86ISelLowering.cpp.

8937  {
8938  SDValue V1 = Op.getOperand(0);
8939  SDValue V2 = Op.getOperand(1);
8940  MVT VT = Op.getSimpleValueType();
8941 
8942  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
8943  "unsupported shuffle type");
8944 
8945  if (V2.getOpcode() == ISD::UNDEF)
8946  V2 = V1;
8947 
8948  // v4i32 or v4f32
8949  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
8950 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue getMOVL ( SelectionDAG DAG,
SDLoc  dl,
EVT  VT,
SDValue  V1,
SDValue  V2 
)
static

getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd operation of specified width.

Definition at line 4966 of file X86ISelLowering.cpp.

4967  {
4968  unsigned NumElems = VT.getVectorNumElements();
4969  SmallVector<int, 8> Mask;
4970  Mask.push_back(NumElems);
4971  for (unsigned i = 1; i != NumElems; ++i)
4972  Mask.push_back(i);
4973  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
4974 }
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue getMOVLowToHigh ( SDValue Op,
SDLoc dl,
SelectionDAG DAG,
bool  HasSSE2 
)
static

Definition at line 8918 of file X86ISelLowering.cpp.

8919  {
8920  SDValue V1 = Op.getOperand(0);
8921  SDValue V2 = Op.getOperand(1);
8922  MVT VT = Op.getSimpleValueType();
8923 
8924  assert(VT != MVT::v2i64 && "unsupported shuffle type");
8925 
8926  if (HasSSE2 && VT == MVT::v2f64)
8927  return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
8928 
8929  // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
8930  return DAG.getNode(ISD::BITCAST, dl, VT,
8931  getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
8932  DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
8933  DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
8934 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue getMOVLP ( SDValue Op,
SDLoc dl,
SelectionDAG DAG,
bool  HasSSE2 
)
static

Definition at line 8953 of file X86ISelLowering.cpp.

8953  {
8954  SDValue V1 = Op.getOperand(0);
8955  SDValue V2 = Op.getOperand(1);
8956  MVT VT = Op.getSimpleValueType();
8957  unsigned NumElems = VT.getVectorNumElements();
8958 
8959  // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
8960  // operand of these instructions is only memory, so check if there's a
8961  // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
8962  // same masks.
8963  bool CanFoldLoad = false;
8964 
8965  // Trivial case, when V2 comes from a load.
8966  if (MayFoldVectorLoad(V2))
8967  CanFoldLoad = true;
8968 
8969  // When V1 is a load, it can be folded later into a store in isel, example:
8970  // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
8971  // turns into:
8972  // (MOVLPSmr addr:$src1, VR128:$src2)
8973  // So, recognize this potential and also use MOVLPS or MOVLPD
8974  else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
8975  CanFoldLoad = true;
8976 
8977  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8978  if (CanFoldLoad) {
8979  if (HasSSE2 && NumElems == 2)
8980  return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
8981 
8982  if (NumElems == 4)
8983  // If we don't care about the second element, proceed to use movss.
8984  if (SVOp->getMaskElt(1) != -1)
8985  return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
8986  }
8987 
8988  // movl and movlp will both match v2i64, but v2i64 is never matched by
8989  // movl earlier because we make it strict to avoid messing with the movlp load
8990  // folding logic (see the code above getMOVLP call). Match it here then,
8991  // this is horrible, but will stay like this until we move all shuffle
8992  // matching to x86 specific nodes. Note that for the 1st condition all
8993  // types are matched with movsd.
8994  if (HasSSE2) {
8995  // FIXME: isMOVLMask should be checked and matched before getMOVLP,
8996  // as to remove this logic from here, as much as possible
8997  if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
8998  return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
8999  return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
9000  }
9001 
9002  assert(VT != MVT::v4i32 && "unsupported shuffle type");
9003 
9004  // Invert the operand order and use SHUFPS to match it.
9005  return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
9006  getShuffleSHUFImmediate(SVOp), DAG);
9007 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static bool MayFoldVectorLoad(SDValue V)
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N)
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
static bool MayFoldIntoStore(SDValue Op)
int getMaskElt(unsigned Idx) const
static bool isMOVLMask(ArrayRef< int > Mask, EVT VT)
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
ArrayRef< int > getMask() const
static unsigned getNumOfConsecutiveZeros ( ShuffleVectorSDNode SVOp,
unsigned  NumElems,
bool  ZerosFromLeft,
SelectionDAG DAG,
unsigned  PreferredNum = -1U 
)
static

getNumOfConsecutiveZeros - Return the number of elements of a vector shuffle operation which come from a consecutively from a zero. The search can start in two different directions, from left or right. We count undefs as zeros until PreferredNum is reached.

Definition at line 5254 of file X86ISelLowering.cpp.

5257  {
5258  unsigned NumZeros = 0;
5259  for (unsigned i = 0; i != NumElems; ++i) {
5260  unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
5261  SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
5262  if (!Elt.getNode())
5263  break;
5264 
5265  if (X86::isZeroNode(Elt))
5266  ++NumZeros;
5267  else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum.
5268  NumZeros = std::min(NumZeros + 1, PreferredNum);
5269  else
5270  break;
5271  }
5272 
5273  return NumZeros;
5274 }
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getOpcode() const
bool isZeroNode(SDValue Elt)
static SDValue getOnesVector ( MVT  VT,
bool  HasInt256,
SelectionDAG DAG,
SDLoc  dl 
)
static

getOnesVector - Returns a vector of specified type with all bits set. Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their original type, ensuring they get CSE'd.

Definition at line 4932 of file X86ISelLowering.cpp.

4933  {
4934  assert(VT.isVector() && "Expected a vector type");
4935 
4936  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
4937  SDValue Vec;
4938  if (VT.is256BitVector()) {
4939  if (HasInt256) { // AVX2
4940  SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
4941  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
4942  } else { // AVX
4943  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
4944  Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
4945  }
4946  } else if (VT.is128BitVector()) {
4947  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
4948  } else
4949  llvm_unreachable("Unexpected vector type");
4950 
4951  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
4952 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
assert(Globals.size() > 1)
bool isVector() const
isVector - Return true if this is a vector value type.
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl)
static SDValue getPrefetchNode ( unsigned  Opc,
SDValue  Op,
SelectionDAG DAG,
SDValue  Mask,
SDValue  Base,
SDValue  Index,
SDValue  ScaleOp,
SDValue  Chain 
)
static

Definition at line 14254 of file X86ISelLowering.cpp.

14256  {
14257  SDLoc dl(Op);
14258  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
14259  assert(C && "Invalid scale type");
14260  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
14261  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
14262  SDValue Segment = DAG.getRegister(0, MVT::i32);
14263  EVT MaskVT =
14264  MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
14265  SDValue MaskInReg;
14266  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
14267  if (MaskC)
14268  MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
14269  else
14270  MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
14271  //SDVTList VTs = DAG.getVTList(MVT::Other);
14272  SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
14273  SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
14274  return SDValue(Res, 0);
14275 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
***NAME is the name of the raw_ostream unsigned & i1
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
int64_t getSExtValue() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
SDValue getRegister(unsigned Reg, EVT VT)
uint64_t getZExtValue() const
static SDValue getPSHUFB ( ArrayRef< int >  MaskVals,
SDValue  V1,
SDLoc dl,
SelectionDAG DAG 
)
static

Generate a PSHUFB if possible. Selects elements from V1 according to MaskVals. MaskVals[OutputIdx] = InputIdx specifies that we want to shuffle the element at InputIdx in V1 to OutputIdx in the result. If MaskVals refers to elements outside of V1 or is undef (-1), insert a zero.

Definition at line 8097 of file X86ISelLowering.cpp.

8098  {
8099  MVT VT = V1.getSimpleValueType();
8100  assert(VT.is128BitVector() || VT.is256BitVector());
8101 
8102  MVT EltVT = VT.getVectorElementType();
8103  unsigned EltSizeInBytes = EltVT.getSizeInBits() / 8;
8104  unsigned NumElts = VT.getVectorNumElements();
8105 
8106  SmallVector<SDValue, 32> PshufbMask;
8107  for (unsigned OutputIdx = 0; OutputIdx < NumElts; ++OutputIdx) {
8108  int InputIdx = MaskVals[OutputIdx];
8109  unsigned InputByteIdx;
8110 
8111  if (InputIdx < 0 || NumElts <= (unsigned)InputIdx)
8112  InputByteIdx = 0x80;
8113  else {
8114  // Cross lane is not allowed.
8115  if (ShuffleCrosses128bitLane(VT, InputIdx, OutputIdx))
8116  return SDValue();
8117  InputByteIdx = InputIdx * EltSizeInBytes;
8118  // Index is an byte offset within the 128-bit lane.
8119  InputByteIdx &= 0xf;
8120  }
8121 
8122  for (unsigned j = 0; j < EltSizeInBytes; ++j) {
8123  PshufbMask.push_back(DAG.getConstant(InputByteIdx, MVT::i8));
8124  if (InputByteIdx != 0x80)
8125  ++InputByteIdx;
8126  }
8127  }
8128 
8129  MVT ShufVT = MVT::getVectorVT(MVT::i8, PshufbMask.size());
8130  if (ShufVT != VT)
8131  V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1);
8132  return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1,
8133  DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask));
8134 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
PSHUFB - Shuffle 16 8-bit values within a vector.
static bool ShuffleCrosses128bitLane(MVT VT, unsigned InputIdx, unsigned OutputIdx)
unsigned getSizeInBits() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
static SmallVector<int, 4> getPSHUFShuffleMask ( SDValue  N)
static

Get the PSHUF-style mask from PSHUF node.

This is a very minor wrapper around getTargetShuffleMask to easy forming v4 PSHUF-style masks that can be reused with such instructions.

Definition at line 18468 of file X86ISelLowering.cpp.

18468  {
18469  SmallVector<int, 4> Mask;
18470  bool IsUnary;
18471  bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
18472  (void)HaveMask;
18473  assert(HaveMask);
18474 
18475  switch (N.getOpcode()) {
18476  case X86ISD::PSHUFD:
18477  return Mask;
18478  case X86ISD::PSHUFLW:
18479  Mask.resize(4);
18480  return Mask;
18481  case X86ISD::PSHUFHW:
18482  Mask.erase(Mask.begin(), Mask.begin() + 4);
18483  for (int &M : Mask)
18484  M -= 4;
18485  return Mask;
18486  default:
18487  llvm_unreachable("No valid shuffle instruction found!");
18488  }
18489 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
unsigned getOpcode() const
iterator erase(iterator I)
Definition: SmallVector.h:450
void resize(unsigned N)
Definition: SmallVector.h:376
static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl< int > &Mask, bool &IsUnary)
static void getReadPerformanceCounter ( SDNode N,
SDLoc  DL,
SelectionDAG DAG,
const X86Subtarget Subtarget,
SmallVectorImpl< SDValue > &  Results 
)
static

Definition at line 14279 of file X86ISelLowering.cpp.

14281  {
14282  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
14283  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
14284  SDValue LO, HI;
14285 
14286  // The ECX register is used to select the index of the performance counter
14287  // to read.
14288  SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
14289  N->getOperand(2));
14290  SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
14291 
14292  // Reads the content of a 64-bit performance counter and returns it in the
14293  // registers EDX:EAX.
14294  if (Subtarget->is64Bit()) {
14295  LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
14296  HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
14297  LO.getValue(2));
14298  } else {
14299  LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
14300  HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
14301  LO.getValue(2));
14302  }
14303  Chain = HI.getValue(1);
14304 
14305  if (Subtarget->is64Bit()) {
14306  // The EAX register is loaded with the low-order 32 bits. The EDX register
14307  // is loaded with the supported high-order bits of the counter.
14308  SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
14309  DAG.getConstant(32, MVT::i8));
14310  Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
14311  Results.push_back(Chain);
14312  return;
14313  }
14314 
14315  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
14316  SDValue Ops[] = { LO, HI };
14317  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
14318  Results.push_back(Pair);
14319  Results.push_back(Chain);
14320 }
SDValue getValue(unsigned R) const
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:486
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
SDVTList getVTList(EVT VT)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
assert(Globals.size() > 1)
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:511
X86 Read Performance Monitoring Counters.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static void getReadTimeStampCounter ( SDNode N,
SDLoc  DL,
unsigned  Opcode,
SelectionDAG DAG,
const X86Subtarget Subtarget,
SmallVectorImpl< SDValue > &  Results 
)
static

Definition at line 14325 of file X86ISelLowering.cpp.

14327  {
14328  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
14329  SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
14330  SDValue LO, HI;
14331 
14332  // The processor's time-stamp counter (a 64-bit MSR) is stored into the
14333  // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
14334  // and the EAX register is loaded with the low-order 32 bits.
14335  if (Subtarget->is64Bit()) {
14336  LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
14337  HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
14338  LO.getValue(2));
14339  } else {
14340  LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
14341  HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
14342  LO.getValue(2));
14343  }
14344  SDValue Chain = HI.getValue(1);
14345 
14346  if (Opcode == X86ISD::RDTSCP_DAG) {
14347  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
14348 
14349  // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
14350  // the ECX register. Add 'ecx' explicitly to the chain.
14351  SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
14352  HI.getValue(2));
14353  // Explicitly store the content of ECX at the location passed in input
14354  // to the 'rdtscp' intrinsic.
14355  Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
14356  MachinePointerInfo(), false, false, 0);
14357  }
14358 
14359  if (Subtarget->is64Bit()) {
14360  // The EDX register is loaded with the high-order 32 bits of the MSR, and
14361  // the EAX register is loaded with the low-order 32 bits.
14362  SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
14363  DAG.getConstant(32, MVT::i8));
14364  Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
14365  Results.push_back(Chain);
14366  return;
14367  }
14368 
14369  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
14370  SDValue Ops[] = { LO, HI };
14371  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
14372  Results.push_back(Pair);
14373  Results.push_back(Chain);
14374 }
SDValue getValue(unsigned R) const
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
SDVTList getVTList(EVT VT)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
assert(Globals.size() > 1)
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:511
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=nullptr)
X86 Read Time-Stamp Counter and Processor ID.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getScatterNode ( unsigned  Opc,
SDValue  Op,
SelectionDAG DAG,
SDValue  Src,
SDValue  Mask,
SDValue  Base,
SDValue  Index,
SDValue  ScaleOp,
SDValue  Chain 
)
static

Definition at line 14231 of file X86ISelLowering.cpp.

14233  {
14234  SDLoc dl(Op);
14235  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
14236  assert(C && "Invalid scale type");
14237  SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
14238  SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
14239  SDValue Segment = DAG.getRegister(0, MVT::i32);
14240  EVT MaskVT = MVT::getVectorVT(MVT::i1,
14242  SDValue MaskInReg;
14243  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
14244  if (MaskC)
14245  MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
14246  else
14247  MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
14248  SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
14249  SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
14250  SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
14251  return SDValue(Res, 1);
14252 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDVTList getVTList(EVT VT)
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
***NAME is the name of the raw_ostream unsigned & i1
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
int64_t getSExtValue() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
SDValue getRegister(unsigned Reg, EVT VT)
uint64_t getZExtValue() const
static unsigned getShufflePALIGNRImmediate ( ShuffleVectorSDNode SVOp)
static

getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.

Definition at line 4673 of file X86ISelLowering.cpp.

4673  {
4674  MVT VT = SVOp->getSimpleValueType(0);
4675  unsigned EltSize = VT.is512BitVector() ? 1 :
4676  VT.getVectorElementType().getSizeInBits() >> 3;
4677 
4678  unsigned NumElts = VT.getVectorNumElements();
4679  unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
4680  unsigned NumLaneElts = NumElts/NumLanes;
4681 
4682  int Val = 0;
4683  unsigned i;
4684  for (i = 0; i != NumElts; ++i) {
4685  Val = SVOp->getMaskElt(i);
4686  if (Val >= 0)
4687  break;
4688  }
4689  if (Val >= (int)NumElts)
4690  Val -= NumElts - NumLaneElts;
4691 
4692  assert(Val - i > 0 && "PALIGNR imm should be positive");
4693  return (Val - i) * EltSize;
4694 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getSizeInBits() const
int getMaskElt(unsigned Idx) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static unsigned getShufflePSHUFHWImmediate ( ShuffleVectorSDNode N)
static

getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.

Definition at line 4625 of file X86ISelLowering.cpp.

4625  {
4626  MVT VT = N->getSimpleValueType(0);
4627 
4628  assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
4629  "Unsupported vector type for PSHUFHW");
4630 
4631  unsigned NumElts = VT.getVectorNumElements();
4632 
4633  unsigned Mask = 0;
4634  for (unsigned l = 0; l != NumElts; l += 8) {
4635  // 8 nodes per lane, but we only care about the last 4.
4636  for (unsigned i = 0; i < 4; ++i) {
4637  int Elt = N->getMaskElt(l+i+4);
4638  if (Elt < 0) continue;
4639  Elt &= 0x3; // only 2-bits.
4640  Mask |= Elt << (i * 2);
4641  }
4642  }
4643 
4644  return Mask;
4645 }
int getMaskElt(unsigned Idx) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
MVT getSimpleValueType(unsigned ResNo) const
static unsigned getShufflePSHUFLWImmediate ( ShuffleVectorSDNode N)
static

getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.

Definition at line 4649 of file X86ISelLowering.cpp.

4649  {
4650  MVT VT = N->getSimpleValueType(0);
4651 
4652  assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
4653  "Unsupported vector type for PSHUFHW");
4654 
4655  unsigned NumElts = VT.getVectorNumElements();
4656 
4657  unsigned Mask = 0;
4658  for (unsigned l = 0; l != NumElts; l += 8) {
4659  // 8 nodes per lane, but we only care about the first 4.
4660  for (unsigned i = 0; i < 4; ++i) {
4661  int Elt = N->getMaskElt(l+i);
4662  if (Elt < 0) continue;
4663  Elt &= 0x3; // only 2-bits
4664  Mask |= Elt << (i * 2);
4665  }
4666  }
4667 
4668  return Mask;
4669 }
int getMaskElt(unsigned Idx) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue getShuffleScalarElt ( SDNode N,
unsigned  Index,
SelectionDAG DAG,
unsigned  Depth 
)
static

getShuffleScalarElt - Returns the scalar element that will make up the ith element of the result of the vector shuffle.

Definition at line 5188 of file X86ISelLowering.cpp.

5189  {
5190  if (Depth == 6)
5191  return SDValue(); // Limit search depth.
5192 
5193  SDValue V = SDValue(N, 0);
5194  EVT VT = V.getValueType();
5195  unsigned Opcode = V.getOpcode();
5196 
5197  // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
5198  if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
5199  int Elt = SV->getMaskElt(Index);
5200 
5201  if (Elt < 0)
5202  return DAG.getUNDEF(VT.getVectorElementType());
5203 
5204  unsigned NumElems = VT.getVectorNumElements();
5205  SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
5206  : SV->getOperand(1);
5207  return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
5208  }
5209 
5210  // Recurse into target specific vector shuffles to find scalars.
5211  if (isTargetShuffle(Opcode)) {
5212  MVT ShufVT = V.getSimpleValueType();
5213  unsigned NumElems = ShufVT.getVectorNumElements();
5214  SmallVector<int, 16> ShuffleMask;
5215  bool IsUnary;
5216 
5217  if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
5218  return SDValue();
5219 
5220  int Elt = ShuffleMask[Index];
5221  if (Elt < 0)
5222  return DAG.getUNDEF(ShufVT.getVectorElementType());
5223 
5224  SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
5225  : N->getOperand(1);
5226  return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
5227  Depth+1);
5228  }
5229 
5230  // Actual nodes that may contain scalar elements
5231  if (Opcode == ISD::BITCAST) {
5232  V = V.getOperand(0);
5233  EVT SrcVT = V.getValueType();
5234  unsigned NumElems = VT.getVectorNumElements();
5235 
5236  if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
5237  return SDValue();
5238  }
5239 
5240  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
5241  return (Index == 0) ? V.getOperand(0)
5242  : DAG.getUNDEF(VT.getVectorElementType());
5243 
5244  if (V.getOpcode() == ISD::BUILD_VECTOR)
5245  return V.getOperand(Index);
5246 
5247  return SDValue();
5248 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth)
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
static bool isTargetShuffle(unsigned Opcode)
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
EVT getValueType() const
static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl< int > &Mask, bool &IsUnary)
MVT getVectorElementType() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static unsigned getShuffleSHUFImmediate ( ShuffleVectorSDNode N)
static

getShuffleSHUFImmediate - Return the appropriate immediate to shuffle the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. Handles 128-bit and 256-bit.

Definition at line 4595 of file X86ISelLowering.cpp.

4595  {
4596  MVT VT = N->getSimpleValueType(0);
4597 
4598  assert((VT.getSizeInBits() >= 128) &&
4599  "Unsupported vector type for PSHUF/SHUFP");
4600 
4601  // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
4602  // independently on 128-bit lanes.
4603  unsigned NumElts = VT.getVectorNumElements();
4604  unsigned NumLanes = VT.getSizeInBits()/128;
4605  unsigned NumLaneElts = NumElts/NumLanes;
4606 
4607  assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
4608  "Only supports 2, 4 or 8 elements per lane");
4609 
4610  unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
4611  unsigned Mask = 0;
4612  for (unsigned i = 0; i != NumElts; ++i) {
4613  int Elt = N->getMaskElt(i);
4614  if (Elt < 0) continue;
4615  Elt &= NumLaneElts - 1;
4616  unsigned ShAmt = (i << Shift) % 8;
4617  Mask |= Elt << ShAmt;
4618  }
4619 
4620  return Mask;
4621 }
unsigned getSizeInBits() const
int getMaskElt(unsigned Idx) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue getShuffleVectorZeroOrUndef ( SDValue  V2,
unsigned  Idx,
bool  IsZero,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified vector of zero or undef vector. This produces a shuffle where the low element of V2 is swizzled into the zero/undef vector, landing at element Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).

Definition at line 5090 of file X86ISelLowering.cpp.

5093  {
5094  MVT VT = V2.getSimpleValueType();
5095  SDValue V1 = IsZero
5096  ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5097  unsigned NumElems = VT.getVectorNumElements();
5098  SmallVector<int, 16> MaskVec;
5099  for (unsigned i = 0; i != NumElems; ++i)
5100  // If this is the insertion idx, put the low elt of V2 here.
5101  MaskVec.push_back(i == Idx ? NumElems : i);
5102  return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
5103 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
unsigned getVectorNumElements() const
static unsigned getShuffleVPERM2X128Immediate ( ShuffleVectorSDNode SVOp)
static

getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.

Definition at line 4334 of file X86ISelLowering.cpp.

4334  {
4335  MVT VT = SVOp->getSimpleValueType(0);
4336 
4337  unsigned HalfSize = VT.getVectorNumElements()/2;
4338 
4339  unsigned FstHalf = 0, SndHalf = 0;
4340  for (unsigned i = 0; i < HalfSize; ++i) {
4341  if (SVOp->getMaskElt(i) > 0) {
4342  FstHalf = SVOp->getMaskElt(i)/HalfSize;
4343  break;
4344  }
4345  }
4346  for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
4347  if (SVOp->getMaskElt(i) > 0) {
4348  SndHalf = SVOp->getMaskElt(i)/HalfSize;
4349  break;
4350  }
4351  }
4352 
4353  return (FstHalf | (SndHalf << 4));
4354 }
int getMaskElt(unsigned Idx) const
unsigned getVectorNumElements() const
MVT getSimpleValueType(unsigned ResNo) const
static bool getTargetShuffleMask ( SDNode N,
MVT  VT,
SmallVectorImpl< int > &  Mask,
bool &  IsUnary 
)
static

getTargetShuffleMask - Calculates the shuffle mask corresponding to the target specific opcode. Returns true if the Mask could be calculated. Sets IsUnary to true if only uses one source.

Definition at line 5108 of file X86ISelLowering.cpp.

5109  {
5110  unsigned NumElems = VT.getVectorNumElements();
5111  SDValue ImmN;
5112 
5113  IsUnary = false;
5114  switch(N->getOpcode()) {
5115  case X86ISD::SHUFP:
5116  ImmN = N->getOperand(N->getNumOperands()-1);
5117  DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5118  break;
5119  case X86ISD::UNPCKH:
5120  DecodeUNPCKHMask(VT, Mask);
5121  break;
5122  case X86ISD::UNPCKL:
5123  DecodeUNPCKLMask(VT, Mask);
5124  break;
5125  case X86ISD::MOVHLPS:
5126  DecodeMOVHLPSMask(NumElems, Mask);
5127  break;
5128  case X86ISD::MOVLHPS:
5129  DecodeMOVLHPSMask(NumElems, Mask);
5130  break;
5131  case X86ISD::PALIGNR:
5132  ImmN = N->getOperand(N->getNumOperands()-1);
5133  DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5134  break;
5135  case X86ISD::PSHUFD:
5136  case X86ISD::VPERMILP:
5137  ImmN = N->getOperand(N->getNumOperands()-1);
5138  DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5139  IsUnary = true;
5140  break;
5141  case X86ISD::PSHUFHW:
5142  ImmN = N->getOperand(N->getNumOperands()-1);
5143  DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5144  IsUnary = true;
5145  break;
5146  case X86ISD::PSHUFLW:
5147  ImmN = N->getOperand(N->getNumOperands()-1);
5148  DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5149  IsUnary = true;
5150  break;
5151  case X86ISD::VPERMI:
5152  ImmN = N->getOperand(N->getNumOperands()-1);
5153  DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5154  IsUnary = true;
5155  break;
5156  case X86ISD::MOVSS:
5157  case X86ISD::MOVSD: {
5158  // The index 0 always comes from the first element of the second source,
5159  // this is why MOVSS and MOVSD are used in the first place. The other
5160  // elements come from the other positions of the first source vector
5161  Mask.push_back(NumElems);
5162  for (unsigned i = 1; i != NumElems; ++i) {
5163  Mask.push_back(i);
5164  }
5165  break;
5166  }
5167  case X86ISD::VPERM2X128:
5168  ImmN = N->getOperand(N->getNumOperands()-1);
5169  DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5170  if (Mask.empty()) return false;
5171  break;
5172  case X86ISD::MOVDDUP:
5173  case X86ISD::MOVLHPD:
5174  case X86ISD::MOVLPD:
5175  case X86ISD::MOVLPS:
5176  case X86ISD::MOVSHDUP:
5177  case X86ISD::MOVSLDUP:
5178  // Not yet implemented
5179  return false;
5180  default: llvm_unreachable("unknown target shuffle node");
5181  }
5182 
5183  return true;
5184 }
void push_back(const T &Elt)
Definition: SmallVector.h:225
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
unsigned getOpcode() const
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl< int > &ShuffleMask)
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
unsigned getVectorNumElements() const
void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodePSHUFLWMask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodeUNPCKHMask(MVT VT, SmallVectorImpl< int > &ShuffleMask)
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
static SDValue getTargetShuffleNode ( unsigned  Opc,
SDLoc  dl,
EVT  VT,
SDValue  V1,
SelectionDAG DAG 
)
static

Definition at line 3405 of file X86ISelLowering.cpp.

3406  {
3407  switch(Opc) {
3408  default: llvm_unreachable("Unknown x86 shuffle node");
3409  case X86ISD::MOVSHDUP:
3410  case X86ISD::MOVSLDUP:
3411  case X86ISD::MOVDDUP:
3412  return DAG.getNode(Opc, dl, VT, V1);
3413  }
3414 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue getTargetShuffleNode ( unsigned  Opc,
SDLoc  dl,
EVT  VT,
SDValue  V1,
unsigned  TargetMask,
SelectionDAG DAG 
)
static

Definition at line 3416 of file X86ISelLowering.cpp.

3418  {
3419  switch(Opc) {
3420  default: llvm_unreachable("Unknown x86 shuffle node");
3421  case X86ISD::PSHUFD:
3422  case X86ISD::PSHUFHW:
3423  case X86ISD::PSHUFLW:
3424  case X86ISD::VPERMILP:
3425  case X86ISD::VPERMI:
3426  return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
3427  }
3428 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getTargetShuffleNode ( unsigned  Opc,
SDLoc  dl,
EVT  VT,
SDValue  V1,
SDValue  V2,
unsigned  TargetMask,
SelectionDAG DAG 
)
static

Definition at line 3430 of file X86ISelLowering.cpp.

3432  {
3433  switch(Opc) {
3434  default: llvm_unreachable("Unknown x86 shuffle node");
3435  case X86ISD::PALIGNR:
3436  case X86ISD::SHUFP:
3437  case X86ISD::VPERM2X128:
3438  return DAG.getNode(Opc, dl, VT, V1, V2,
3439  DAG.getConstant(TargetMask, MVT::i8));
3440  }
3441 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getTargetShuffleNode ( unsigned  Opc,
SDLoc  dl,
EVT  VT,
SDValue  V1,
SDValue  V2,
SelectionDAG DAG 
)
static

Definition at line 3443 of file X86ISelLowering.cpp.

3444  {
3445  switch(Opc) {
3446  default: llvm_unreachable("Unknown x86 shuffle node");
3447  case X86ISD::MOVLHPS:
3448  case X86ISD::MOVLHPD:
3449  case X86ISD::MOVHLPS:
3450  case X86ISD::MOVLPS:
3451  case X86ISD::MOVLPD:
3452  case X86ISD::MOVSS:
3453  case X86ISD::MOVSD:
3454  case X86ISD::UNPCKL:
3455  case X86ISD::UNPCKH:
3456  return DAG.getNode(Opc, dl, VT, V1, V2);
3457  }
3458 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue getTargetVShiftByConstNode ( unsigned  Opc,
SDLoc  dl,
MVT  VT,
SDValue  SrcOp,
uint64_t  ShiftAmt,
SelectionDAG DAG 
)
static

Definition at line 13375 of file X86ISelLowering.cpp.

13377  {
13378  MVT ElementType = VT.getVectorElementType();
13379 
13380  // Fold this packed shift into its first operand if ShiftAmt is 0.
13381  if (ShiftAmt == 0)
13382  return SrcOp;
13383 
13384  // Check for ShiftAmt >= element width
13385  if (ShiftAmt >= ElementType.getSizeInBits()) {
13386  if (Opc == X86ISD::VSRAI)
13387  ShiftAmt = ElementType.getSizeInBits() - 1;
13388  else
13389  return DAG.getConstant(0, VT);
13390  }
13391 
13392  assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
13393  && "Unknown target vector shift-by-constant node");
13394 
13395  // Fold this packed vector shift into a build vector if SrcOp is a
13396  // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT.
13397  if (VT == SrcOp.getSimpleValueType() &&
13400  unsigned NumElts = SrcOp->getNumOperands();
13401  ConstantSDNode *ND;
13402 
13403  switch(Opc) {
13404  default: llvm_unreachable(nullptr);
13405  case X86ISD::VSHLI:
13406  for (unsigned i=0; i!=NumElts; ++i) {
13407  SDValue CurrentOp = SrcOp->getOperand(i);
13408  if (CurrentOp->getOpcode() == ISD::UNDEF) {
13409  Elts.push_back(CurrentOp);
13410  continue;
13411  }
13412  ND = cast<ConstantSDNode>(CurrentOp);
13413  const APInt &C = ND->getAPIntValue();
13414  Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType));
13415  }
13416  break;
13417  case X86ISD::VSRLI:
13418  for (unsigned i=0; i!=NumElts; ++i) {
13419  SDValue CurrentOp = SrcOp->getOperand(i);
13420  if (CurrentOp->getOpcode() == ISD::UNDEF) {
13421  Elts.push_back(CurrentOp);
13422  continue;
13423  }
13424  ND = cast<ConstantSDNode>(CurrentOp);
13425  const APInt &C = ND->getAPIntValue();
13426  Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType));
13427  }
13428  break;
13429  case X86ISD::VSRAI:
13430  for (unsigned i=0; i!=NumElts; ++i) {
13431  SDValue CurrentOp = SrcOp->getOperand(i);
13432  if (CurrentOp->getOpcode() == ISD::UNDEF) {
13433  Elts.push_back(CurrentOp);
13434  continue;
13435  }
13436  ND = cast<ConstantSDNode>(CurrentOp);
13437  const APInt &C = ND->getAPIntValue();
13438  Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType));
13439  }
13440  break;
13441  }
13442 
13443  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
13444  }
13445 
13446  return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
13447 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
unsigned getOpcode() const
unsigned getSizeInBits() const
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
const APInt & getAPIntValue() const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
static SDValue getTargetVShiftNode ( unsigned  Opc,
SDLoc  dl,
MVT  VT,
SDValue  SrcOp,
SDValue  ShAmt,
SelectionDAG DAG 
)
static

Definition at line 13451 of file X86ISelLowering.cpp.

13453  {
13454  assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
13455 
13456  // Catch shift-by-constant.
13457  if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
13458  return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
13459  CShAmt->getZExtValue(), DAG);
13460 
13461  // Change opcode to non-immediate version
13462  switch (Opc) {
13463  default: llvm_unreachable("Unknown target vector shift node");
13464  case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
13465  case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
13466  case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
13467  }
13468 
13469  // Need to build a vector containing shift amount
13470  // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
13471  SDValue ShOps[4];
13472  ShOps[0] = ShAmt;
13473  ShOps[1] = DAG.getConstant(0, MVT::i32);
13474  ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
13475  ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps);
13476 
13477  // The return type has to be a 128-bit type with the same element
13478  // type as the input type.
13479  MVT EltVT = VT.getVectorElementType();
13480  EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
13481 
13482  ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
13483  return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
13484 }
unsigned getSizeInBits() const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
MVT getVectorElementType() const
static SDValue GetTLSADDR ( SelectionDAG DAG,
SDValue  Chain,
GlobalAddressSDNode GA,
SDValue InFlag,
const EVT  PtrVT,
unsigned  ReturnReg,
unsigned char  OperandFlags,
bool  LocalDynamic = false 
)
static

Definition at line 10339 of file X86ISelLowering.cpp.

10341  {
10343  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10344  SDLoc dl(GA);
10345  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
10346  GA->getValueType(0),
10347  GA->getOffset(),
10348  OperandFlags);
10349 
10351  : X86ISD::TLSADDR;
10352 
10353  if (InFlag) {
10354  SDValue Ops[] = { Chain, TGA, *InFlag };
10355  Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
10356  } else {
10357  SDValue Ops[] = { Chain, TGA };
10358  Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
10359  }
10360 
10361  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10362  MFI->setAdjustsStack(true);
10363 
10364  SDValue Flag = Chain.getValue(1);
10365  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
10366 }
SDValue getValue(unsigned R) const
const GlobalValue * getGlobal() const
EVT getValueType(unsigned ResNo) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:433
Abstract Stack Frame Information.
SDVTList getVTList(EVT VT)
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:511
MachineFrameInfo * getFrameInfo()
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static int getUnderlyingExtractedFromVec ( SDValue ExtractedFromVec,
SDValue  ExtIdx 
)
static

For an EXTRACT_VECTOR_ELT with a constant index return the real underlying vector and index.

Modifies ExtractedFromVec to the real vector and returns the real index.

Definition at line 5883 of file X86ISelLowering.cpp.

5884  {
5885  int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
5886  if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
5887  return Idx;
5888 
5889  // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
5890  // lowered this:
5891  // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
5892  // to:
5893  // (extract_vector_elt (vector_shuffle<2,u,u,u>
5894  // (extract_subvector (v8f32 %vreg0), Constant<4>),
5895  // undef)
5896  // Constant<0>)
5897  // In this case the vector is the extract_subvector expression and the index
5898  // is 2, as specified by the shuffle.
5899  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
5900  SDValue ShuffleVec = SVOp->getOperand(0);
5901  MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
5902  assert(ShuffleVecVT.getVectorElementType() ==
5903  ExtractedFromVec.getSimpleValueType().getVectorElementType());
5904 
5905  int ShuffleIdx = SVOp->getMaskElt(Idx);
5906  if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
5907  ExtractedFromVec = ShuffleVec;
5908  return ShuffleIdx;
5909  }
5910  return Idx;
5911 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const SDValue & getOperand(unsigned Num) const
static bool isUndefOrInRange(int Val, int Low, int Hi)
int getMaskElt(unsigned Idx) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
MVT getVectorElementType() const
static SDValue getUnpackh ( SelectionDAG DAG,
SDLoc  dl,
MVT  VT,
SDValue  V1,
SDValue  V2 
)
static

getUnpackh - Returns a vector_shuffle node for an unpackh operation.

Definition at line 4989 of file X86ISelLowering.cpp.

4990  {
4991  unsigned NumElems = VT.getVectorNumElements();
4992  SmallVector<int, 8> Mask;
4993  for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
4994  Mask.push_back(i + Half);
4995  Mask.push_back(i + NumElems + Half);
4996  }
4997  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
4998 }
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
unsigned getVectorNumElements() const
static SDValue getUnpackl ( SelectionDAG DAG,
SDLoc  dl,
MVT  VT,
SDValue  V1,
SDValue  V2 
)
static

getUnpackl - Returns a vector_shuffle node for an unpackl operation.

Definition at line 4977 of file X86ISelLowering.cpp.

4978  {
4979  unsigned NumElems = VT.getVectorNumElements();
4980  SmallVector<int, 8> Mask;
4981  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
4982  Mask.push_back(i);
4983  Mask.push_back(i + NumElems);
4984  }
4985  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
4986 }
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
unsigned getVectorNumElements() const
static SDValue getV4X86ShuffleImm8ForMask ( ArrayRef< int >  Mask,
SelectionDAG DAG 
)
static

Get a 4-lane 8-bit shuffle immediate for a mask.

This helper function produces an 8-bit shuffle immediate corresponding to the ubiquitous shuffle encoding scheme used in x86 instructions for shuffling 4 lanes. It can be used with most of the PSHUF instructions for example.

NB: We rely heavily on "undef" masks preserving the input lane.

Definition at line 6928 of file X86ISelLowering.cpp.

6929  {
6930  assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
6931  assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
6932  assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
6933  assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
6934  assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
6935 
6936  unsigned Imm = 0;
6937  Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
6938  Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
6939  Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
6940  Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
6941  return DAG.getConstant(Imm, MVT::i8);
6942 }
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
assert(Globals.size() > 1)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getVShift ( bool  isLeft,
EVT  VT,
SDValue  SrcOp,
unsigned  NumBits,
SelectionDAG DAG,
const TargetLowering TLI,
SDLoc  dl 
)
static

getVShift - Return a vector logical shift node.

Definition at line 5547 of file X86ISelLowering.cpp.

5549  {
5550  assert(VT.is128BitVector() && "Unknown type for VShift");
5551  EVT ShVT = MVT::v2i64;
5552  unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
5553  SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
5554  return DAG.getNode(ISD::BITCAST, dl, VT,
5555  DAG.getNode(Opc, dl, ShVT, SrcOp,
5556  DAG.getConstant(NumBits,
5557  TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
5558 }
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getVZextMovL ( MVT  VT,
MVT  OpVT,
SDValue  SrcOp,
SelectionDAG DAG,
const X86Subtarget Subtarget,
SDLoc  dl 
)
static

getVZextMovL - Return a zero-extending vector move low node.

Definition at line 8604 of file X86ISelLowering.cpp.

8606  {
8607  if (VT == MVT::v2f64 || VT == MVT::v4f32) {
8608  LoadSDNode *LD = nullptr;
8609  if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
8610  LD = dyn_cast<LoadSDNode>(SrcOp);
8611  if (!LD) {
8612  // movssrr and movsdrr do not clear top bits. Try to use movd, movq
8613  // instead.
8614  MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
8615  if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
8616  SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
8617  SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
8618  SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
8619  // PR2108
8620  OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
8621  return DAG.getNode(ISD::BITCAST, dl, VT,
8622  DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
8624  OpVT,
8625  SrcOp.getOperand(0)
8626  .getOperand(0))));
8627  }
8628  }
8629  }
8630 
8631  return DAG.getNode(ISD::BITCAST, dl, VT,
8632  DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
8633  DAG.getNode(ISD::BITCAST, dl,
8634  OpVT, SrcOp)));
8635 }
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD=nullptr)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
static SDValue getZeroVector ( EVT  VT,
const X86Subtarget Subtarget,
SelectionDAG DAG,
SDLoc  dl 
)
static

getZeroVector - Returns a vector of specified type with all zero elements.

Definition at line 4885 of file X86ISelLowering.cpp.

4886  {
4887  assert(VT.isVector() && "Expected a vector type");
4888 
4889  // Always build SSE zero vectors as <4 x i32> bitcasted
4890  // to their dest type. This ensures they get CSE'd.
4891  SDValue Vec;
4892  if (VT.is128BitVector()) { // SSE
4893  if (Subtarget->hasSSE2()) { // SSE2
4894  SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
4895  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
4896  } else { // SSE1
4897  SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
4898  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
4899  }
4900  } else if (VT.is256BitVector()) { // AVX
4901  if (Subtarget->hasInt256()) { // AVX2
4902  SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
4903  SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
4904  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
4905  } else {
4906  // 256-bit logic and arithmetic instructions in AVX are all
4907  // floating-point, no support for integer ops. Emit fp zeroed vectors.
4908  SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
4909  SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
4910  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
4911  }
4912  } else if (VT.is512BitVector()) { // AVX-512
4913  SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
4914  SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
4915  Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
4916  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
4917  } else if (VT.getScalarType() == MVT::i1) {
4918  assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
4919  SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
4921  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
4922  } else
4923  llvm_unreachable("Unexpected vector type");
4924 
4925  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
4926 }
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getScalarType() const
Definition: ValueTypes.h:211
bool hasSSE2() const
Definition: X86Subtarget.h:312
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
SDValue getTargetConstantFP(double Val, EVT VT)
Definition: SelectionDAG.h:421
***NAME is the name of the raw_ostream unsigned & i1
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:146
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool hasFPCMov ( unsigned  X86CC)
static

hasFPCMov - is there a floating point cmov for the specific X86 condition code. Current x86 isa includes the following FP cmov instructions: fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.

Definition at line 3643 of file X86ISelLowering.cpp.

3643  {
3644  switch (X86CC) {
3645  default:
3646  return false;
3647  case X86::COND_B:
3648  case X86::COND_BE:
3649  case X86::COND_E:
3650  case X86::COND_P:
3651  case X86::COND_A:
3652  case X86::COND_AE:
3653  case X86::COND_NE:
3654  case X86::COND_NP:
3655  return true;
3656  }
3657 }
static bool hasNonFlagsUse ( SDValue  Op)
static

return true if Op has a use that doesn't just read flags.

Definition at line 11634 of file X86ISelLowering.cpp.

11634  {
11635  for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
11636  ++UI) {
11637  SDNode *User = *UI;
11638  unsigned UOpNo = UI.getOperandNo();
11639  if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
11640  // Look pass truncate.
11641  UOpNo = User->use_begin().getOperandNo();
11642  User = *User->use_begin();
11643  }
11644 
11645  if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
11646  !(User->getOpcode() == ISD::SELECT && UOpNo == 0))
11647  return true;
11648  }
11649  return false;
11650 }
bool hasOneUse() const
unsigned getOpcode() const
unsigned getOperandNo() const
use_iterator use_begin() const
static use_iterator use_end()
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static void InitIntinsicsMap ( )
static

Definition at line 14398 of file X86ISelLowering.cpp.

14398  {
14399  static bool Initialized = false;
14400  if (Initialized)
14401  return;
14402  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
14403  IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
14404  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
14405  IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
14406  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpd_512,
14407  IntrinsicData(GATHER, X86::VGATHERQPDZrm, 0)));
14408  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpd_512,
14409  IntrinsicData(GATHER, X86::VGATHERDPDZrm, 0)));
14410  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dps_512,
14411  IntrinsicData(GATHER, X86::VGATHERDPSZrm, 0)));
14412  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
14413  IntrinsicData(GATHER, X86::VPGATHERQDZrm, 0)));
14414  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
14415  IntrinsicData(GATHER, X86::VPGATHERQQZrm, 0)));
14416  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
14417  IntrinsicData(GATHER, X86::VPGATHERDDZrm, 0)));
14418  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
14419  IntrinsicData(GATHER, X86::VPGATHERDQZrm, 0)));
14420 
14421  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qps_512,
14422  IntrinsicData(SCATTER, X86::VSCATTERQPSZmr, 0)));
14423  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
14424  IntrinsicData(SCATTER, X86::VSCATTERQPDZmr, 0)));
14425  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
14426  IntrinsicData(SCATTER, X86::VSCATTERDPDZmr, 0)));
14427  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
14428  IntrinsicData(SCATTER, X86::VSCATTERDPSZmr, 0)));
14429  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
14430  IntrinsicData(SCATTER, X86::VPSCATTERQDZmr, 0)));
14431  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
14432  IntrinsicData(SCATTER, X86::VPSCATTERQQZmr, 0)));
14433  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
14434  IntrinsicData(SCATTER, X86::VPSCATTERDDZmr, 0)));
14435  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
14436  IntrinsicData(SCATTER, X86::VPSCATTERDQZmr, 0)));
14437 
14438  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
14439  IntrinsicData(PREFETCH, X86::VGATHERPF0QPSm,
14440  X86::VGATHERPF1QPSm)));
14441  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
14442  IntrinsicData(PREFETCH, X86::VGATHERPF0QPDm,
14443  X86::VGATHERPF1QPDm)));
14444  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
14445  IntrinsicData(PREFETCH, X86::VGATHERPF0DPDm,
14446  X86::VGATHERPF1DPDm)));
14447  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
14448  IntrinsicData(PREFETCH, X86::VGATHERPF0DPSm,
14449  X86::VGATHERPF1DPSm)));
14450  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
14451  IntrinsicData(PREFETCH, X86::VSCATTERPF0QPSm,
14452  X86::VSCATTERPF1QPSm)));
14453  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
14454  IntrinsicData(PREFETCH, X86::VSCATTERPF0QPDm,
14455  X86::VSCATTERPF1QPDm)));
14456  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
14457  IntrinsicData(PREFETCH, X86::VSCATTERPF0DPDm,
14458  X86::VSCATTERPF1DPDm)));
14459  IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
14460  IntrinsicData(PREFETCH, X86::VSCATTERPF0DPSm,
14461  X86::VSCATTERPF1DPSm)));
14462  IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_16,
14464  IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_32,
14466  IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_64,
14468  IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_16,
14470  IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_32,
14472  IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_64,
14474  IntrMap.insert(std::make_pair(Intrinsic::x86_xtest,
14476  IntrMap.insert(std::make_pair(Intrinsic::x86_rdtsc,
14478  IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
14480  IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
14482  Initialized = true;
14483 }
X86 Read Performance Monitoring Counters.
std::map< unsigned, IntrinsicData > IntrMap
X86 Read Time-Stamp Counter and Processor ID.
static SDValue Insert128BitVector ( SDValue  Result,
SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl 
)
static

Generate a DAG to put 128-bits into a vector > 128 bits. This sets things up to match to an AVX VINSERTF128/VINSERTI128 or AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a simple superregister reference. Idx is an index in the 128 bits we want. It need not be aligned to a 128-bit bounday. That makes lowering INSERT_VECTOR_ELT operations easier.

Definition at line 162 of file X86ISelLowering.cpp.

164  {
165  assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
166  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
167 }
assert(Globals.size() > 1)
EVT getValueType() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
static SDValue InsertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
static SDValue Insert256BitVector ( SDValue  Result,
SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl 
)
static

Definition at line 169 of file X86ISelLowering.cpp.

171  {
172  assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
173  return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
174 }
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
EVT getValueType() const
static SDValue InsertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl, unsigned vectorWidth)
static SDValue InsertSubVector ( SDValue  Result,
SDValue  Vec,
unsigned  IdxVal,
SelectionDAG DAG,
SDLoc  dl,
unsigned  vectorWidth 
)
static

Definition at line 132 of file X86ISelLowering.cpp.

134  {
135  assert((vectorWidth == 128 || vectorWidth == 256) &&
136  "Unsupported vector width");
137  // Inserting UNDEF is Result
138  if (Vec.getOpcode() == ISD::UNDEF)
139  return Result;
140  EVT VT = Vec.getValueType();
141  EVT ElVT = VT.getVectorElementType();
142  EVT ResultVT = Result.getValueType();
143 
144  // Insert the relevant vectorWidth bits.
145  unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
146 
147  // This is the index of the first element of the vectorWidth-bit chunk
148  // we want.
149  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
150  * ElemsPerChunk);
151 
152  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
153  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
154  VecIdx);
155 }
EVT getVectorElementType() const
Definition: ValueTypes.h:217
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
unsigned getOpcode() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
static bool isAllOnes ( SDValue  V)
static

Definition at line 11949 of file X86ISelLowering.cpp.

11949  {
11951  return C && C->isAllOnesValue();
11952 }
bool isAllOnesValue() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
static bool isAndOrOfSetCCs ( SDValue  Op,
unsigned &  Opc 
)
static

Definition at line 12817 of file X86ISelLowering.cpp.

12817  {
12818  Opc = Op.getOpcode();
12819  if (Opc != ISD::OR && Opc != ISD::AND)
12820  return false;
12821  return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
12822  Op.getOperand(0).hasOneUse() &&
12823  Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
12824  Op.getOperand(1).hasOneUse());
12825 }
bool hasOneUse() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static bool isBlendMask ( ArrayRef< int >  MaskVals,
MVT  VT,
bool  hasSSE41,
bool  hasInt256,
unsigned *  MaskOut = nullptr 
)
static

Definition at line 8009 of file X86ISelLowering.cpp.

8010  {
8011  MVT EltVT = VT.getVectorElementType();
8012 
8013  // There is no blend with immediate in AVX-512.
8014  if (VT.is512BitVector())
8015  return false;
8016 
8017  if (!hasSSE41 || EltVT == MVT::i8)
8018  return false;
8019  if (!hasInt256 && VT == MVT::v16i16)
8020  return false;
8021 
8022  unsigned MaskValue = 0;
8023  unsigned NumElems = VT.getVectorNumElements();
8024  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
8025  unsigned NumLanes = (NumElems - 1) / 8 + 1;
8026  unsigned NumElemsInLane = NumElems / NumLanes;
8027 
8028  // Blend for v16i16 should be symetric for the both lanes.
8029  for (unsigned i = 0; i < NumElemsInLane; ++i) {
8030 
8031  int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
8032  int EltIdx = MaskVals[i];
8033 
8034  if ((EltIdx < 0 || EltIdx == (int)i) &&
8035  (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
8036  continue;
8037 
8038  if (((unsigned)EltIdx == (i + NumElems)) &&
8039  (SndLaneEltIdx < 0 ||
8040  (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
8041  MaskValue |= (1 << i);
8042  else
8043  return false;
8044  }
8045 
8046  if (MaskOut)
8047  *MaskOut = MaskValue;
8048  return true;
8049 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getVectorNumElements() const
MVT getVectorElementType() const
static bool IsCCallConvention ( CallingConv::ID  CC)
static

Return true if the calling convention is a C calling convention.

Definition at line 2178 of file X86ISelLowering.cpp.

2178  {
2179  return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
2180  CC == CallingConv::X86_64_SysV);
2181 }
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:134
The C convention as implemented on Windows/x86-64. This convention differs from the more common X86_6...
Definition: CallingConv.h:140
static bool isCommutedMOVLMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  V2IsSplat = false,
bool  V2IsUndef = false 
)
static

isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse of what x86 movss want. X86 movs requires the lowest element to be lowest element of vector 2 and the other elements to come from vector 1 in order.

Definition at line 4433 of file X86ISelLowering.cpp.

4434  {
4435  if (!VT.is128BitVector())
4436  return false;
4437 
4438  unsigned NumOps = VT.getVectorNumElements();
4439  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
4440  return false;
4441 
4442  if (!isUndefOrEqual(Mask[0], 0))
4443  return false;
4444 
4445  for (unsigned i = 1; i != NumOps; ++i)
4446  if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
4447  (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
4448  (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
4449  return false;
4450 
4451  return true;
4452 }
static bool isUndefOrEqual(int Val, int CmpVal)
static bool isUndefOrInRange(int Val, int Low, int Hi)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isHorizontalBinOp ( const BuildVectorSDNode N,
unsigned  Opcode,
SelectionDAG DAG,
unsigned  BaseIdx,
unsigned  LastIdx,
SDValue V0,
SDValue V1 
)
static

Return true if N implements a horizontal binop and return the operands for the horizontal binop into V0 and V1.

This is a helper function of PerformBUILD_VECTORCombine. This function checks that the build_vector N in input implements a horizontal operation. Parameter Opcode defines the kind of horizontal operation to match. For example, if Opcode is equal to ISD::ADD, then this function checks if N implements a horizontal arithmetic add; if instead Opcode is equal to ISD::SUB, then this function checks if this is a horizontal arithmetic sub.

This function only analyzes elements of N whose indices are in range [BaseIdx, LastIdx).

Definition at line 6079 of file X86ISelLowering.cpp.

6082  {
6083  EVT VT = N->getValueType(0);
6084 
6085  assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
6086  assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
6087  "Invalid Vector in input!");
6088 
6089  bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
6090  bool CanFold = true;
6091  unsigned ExpectedVExtractIdx = BaseIdx;
6092  unsigned NumElts = LastIdx - BaseIdx;
6093  V0 = DAG.getUNDEF(VT);
6094  V1 = DAG.getUNDEF(VT);
6095 
6096  // Check if N implements a horizontal binop.
6097  for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
6098  SDValue Op = N->getOperand(i + BaseIdx);
6099 
6100  // Skip UNDEFs.
6101  if (Op->getOpcode() == ISD::UNDEF) {
6102  // Update the expected vector extract index.
6103  if (i * 2 == NumElts)
6104  ExpectedVExtractIdx = BaseIdx;
6105  ExpectedVExtractIdx += 2;
6106  continue;
6107  }
6108 
6109  CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
6110 
6111  if (!CanFold)
6112  break;
6113 
6114  SDValue Op0 = Op.getOperand(0);
6115  SDValue Op1 = Op.getOperand(1);
6116 
6117  // Try to match the following pattern:
6118  // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
6119  CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6121  Op0.getOperand(0) == Op1.getOperand(0) &&
6122  isa<ConstantSDNode>(Op0.getOperand(1)) &&
6123  isa<ConstantSDNode>(Op1.getOperand(1)));
6124  if (!CanFold)
6125  break;
6126 
6127  unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
6128  unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
6129 
6130  if (i * 2 < NumElts) {
6131  if (V0.getOpcode() == ISD::UNDEF)
6132  V0 = Op0.getOperand(0);
6133  } else {
6134  if (V1.getOpcode() == ISD::UNDEF)
6135  V1 = Op0.getOperand(0);
6136  if (i * 2 == NumElts)
6137  ExpectedVExtractIdx = BaseIdx;
6138  }
6139 
6140  SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
6141  if (I0 == ExpectedVExtractIdx)
6142  CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
6143  else if (IsCommutable && I1 == ExpectedVExtractIdx) {
6144  // Try to match the following dag sequence:
6145  // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
6146  CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
6147  } else
6148  CanFold = false;
6149 
6150  ExpectedVExtractIdx += 2;
6151  }
6152 
6153  return CanFold;
6154 }
bool hasOneUse() const
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:227
unsigned getOpcode() const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool isHorizontalBinOp ( SDValue LHS,
SDValue RHS,
bool  IsCommutative 
)
static

isHorizontalBinOp - Return 'true' if this vector operation is "horizontal" and return the operands for the horizontal operation in LHS and RHS. A horizontal operation performs the binary operation on successive elements of its first operand, then on successive elements of its second operand, returning the resulting values in a vector. For example, if A = < float a0, float a1, float a2, float a3 > and B = < float b0, float b1, float b2, float b3 > then the result of doing a horizontal operation on A and B is A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >. In short, LHS and RHS are inspected to see if LHS op RHS is of the form A horizontal-op B, for some already available A and B, and if so then LHS is set to A, RHS to B, and the routine returns 'true'. Note that the binary operation should have the property that if one of the operands is UNDEF then the result is UNDEF.

Definition at line 21305 of file X86ISelLowering.cpp.

21305  {
21306  // Look for the following pattern: if
21307  // A = < float a0, float a1, float a2, float a3 >
21308  // B = < float b0, float b1, float b2, float b3 >
21309  // and
21310  // LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
21311  // RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
21312  // then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
21313  // which is A horizontal-op B.
21314 
21315  // At least one of the operands should be a vector shuffle.
21316  if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
21317  RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
21318  return false;
21319 
21320  MVT VT = LHS.getSimpleValueType();
21321 
21322  assert((VT.is128BitVector() || VT.is256BitVector()) &&
21323  "Unsupported vector type for horizontal add/sub");
21324 
21325  // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
21326  // operate independently on 128-bit lanes.
21327  unsigned NumElts = VT.getVectorNumElements();
21328  unsigned NumLanes = VT.getSizeInBits()/128;
21329  unsigned NumLaneElts = NumElts / NumLanes;
21330  assert((NumLaneElts % 2 == 0) &&
21331  "Vector type should have an even number of elements in each lane");
21332  unsigned HalfLaneElts = NumLaneElts/2;
21333 
21334  // View LHS in the form
21335  // LHS = VECTOR_SHUFFLE A, B, LMask
21336  // If LHS is not a shuffle then pretend it is the shuffle
21337  // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
21338  // NOTE: in what follows a default initialized SDValue represents an UNDEF of
21339  // type VT.
21340  SDValue A, B;
21341  SmallVector<int, 16> LMask(NumElts);
21342  if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
21343  if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
21344  A = LHS.getOperand(0);
21345  if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
21346  B = LHS.getOperand(1);
21347  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
21348  std::copy(Mask.begin(), Mask.end(), LMask.begin());
21349  } else {
21350  if (LHS.getOpcode() != ISD::UNDEF)
21351  A = LHS;
21352  for (unsigned i = 0; i != NumElts; ++i)
21353  LMask[i] = i;
21354  }
21355 
21356  // Likewise, view RHS in the form
21357  // RHS = VECTOR_SHUFFLE C, D, RMask
21358  SDValue C, D;
21359  SmallVector<int, 16> RMask(NumElts);
21360  if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
21361  if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
21362  C = RHS.getOperand(0);
21363  if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
21364  D = RHS.getOperand(1);
21365  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
21366  std::copy(Mask.begin(), Mask.end(), RMask.begin());
21367  } else {
21368  if (RHS.getOpcode() != ISD::UNDEF)
21369  C = RHS;
21370  for (unsigned i = 0; i != NumElts; ++i)
21371  RMask[i] = i;
21372  }
21373 
21374  // Check that the shuffles are both shuffling the same vectors.
21375  if (!(A == C && B == D) && !(A == D && B == C))
21376  return false;
21377 
21378  // If everything is UNDEF then bail out: it would be better to fold to UNDEF.
21379  if (!A.getNode() && !B.getNode())
21380  return false;
21381 
21382  // If A and B occur in reverse order in RHS, then "swap" them (which means
21383  // rewriting the mask).
21384  if (A != C)
21385  CommuteVectorShuffleMask(RMask, NumElts);
21386 
21387  // At this point LHS and RHS are equivalent to
21388  // LHS = VECTOR_SHUFFLE A, B, LMask
21389  // RHS = VECTOR_SHUFFLE A, B, RMask
21390  // Check that the masks correspond to performing a horizontal operation.
21391  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
21392  for (unsigned i = 0; i != NumLaneElts; ++i) {
21393  int LIdx = LMask[i+l], RIdx = RMask[i+l];
21394 
21395  // Ignore any UNDEF components.
21396  if (LIdx < 0 || RIdx < 0 ||
21397  (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
21398  (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
21399  continue;
21400 
21401  // Check that successive elements are being operated on. If not, this is
21402  // not a horizontal operation.
21403  unsigned Src = (i/HalfLaneElts); // each lane is split between srcs
21404  int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
21405  if (!(LIdx == Index && RIdx == Index + 1) &&
21406  !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
21407  return false;
21408  }
21409  }
21410 
21411  LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
21412  RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
21413  return true;
21414 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static void CommuteVectorShuffleMask(SmallVectorImpl< int > &Mask, unsigned NumElems)
iterator end() const
empty - Check if the array is empty.
Definition: ArrayRef.h:98
unsigned getSizeInBits() const
#define false
Definition: ConvertUTF.c:65
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
#define true
Definition: ConvertUTF.c:66
assert(Globals.size() > 1)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
iterator begin() const
empty - Check if the array is empty.
Definition: ArrayRef.h:97
unsigned getOpcode() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isINSERT64x4Mask ( ArrayRef< int >  Mask,
MVT  VT,
unsigned int *  Imm 
)
static

Definition at line 4255 of file X86ISelLowering.cpp.

4255  {
4256  if (!VT.is512BitVector())
4257  return false;
4258 
4259  unsigned NumElts = VT.getVectorNumElements();
4260  unsigned HalfSize = NumElts/2;
4261  if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
4262  if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
4263  *Imm = 1;
4264  return true;
4265  }
4266  }
4267  if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
4268  if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
4269  *Imm = 0;
4270  return true;
4271  }
4272  }
4273  return false;
4274 }
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low)
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getVectorNumElements() const
static bool isINSERTPSMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to INSERTPS. i. e: If all but one element come from the same vector.

Definition at line 4006 of file X86ISelLowering.cpp.

4006  {
4007  // TODO: Deal with AVX's VINSERTPS
4008  if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
4009  return false;
4010 
4011  unsigned CorrectPosV1 = 0;
4012  unsigned CorrectPosV2 = 0;
4013  for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
4014  if (Mask[i] == -1) {
4015  ++CorrectPosV1;
4016  ++CorrectPosV2;
4017  continue;
4018  }
4019 
4020  if (Mask[i] == i)
4021  ++CorrectPosV1;
4022  else if (Mask[i] == i + 4)
4023  ++CorrectPosV2;
4024  }
4025 
4026  if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
4027  // We have 3 elements (undefs count as elements from any vector) from one
4028  // vector, and one from another.
4029  return true;
4030 
4031  return false;
4032 }
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVDDUPMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to 128-bit version of MOVDDUP.

Definition at line 4525 of file X86ISelLowering.cpp.

4525  {
4526  if (!VT.is128BitVector())
4527  return false;
4528 
4529  unsigned e = VT.getVectorNumElements() / 2;
4530  for (unsigned i = 0; i != e; ++i)
4531  if (!isUndefOrEqual(Mask[i], i))
4532  return false;
4533  for (unsigned i = 0; i != e; ++i)
4534  if (!isUndefOrEqual(Mask[e+i], i))
4535  return false;
4536  return true;
4537 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVDDUPYMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasFp256 
)
static

isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to 256-bit version of MOVDDUP.

Definition at line 4505 of file X86ISelLowering.cpp.

4505  {
4506  if (!HasFp256 || !VT.is256BitVector())
4507  return false;
4508 
4509  unsigned NumElts = VT.getVectorNumElements();
4510  if (NumElts != 4)
4511  return false;
4512 
4513  for (unsigned i = 0; i != NumElts/2; ++i)
4514  if (!isUndefOrEqual(Mask[i], 0))
4515  return false;
4516  for (unsigned i = NumElts/2; i != NumElts; ++i)
4517  if (!isUndefOrEqual(Mask[i], NumElts/2))
4518  return false;
4519  return true;
4520 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isMOVHLPS_v_undef_Mask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, <2, 3, 2, 3>

Definition at line 3944 of file X86ISelLowering.cpp.

3944  {
3945  if (!VT.is128BitVector())
3946  return false;
3947 
3948  unsigned NumElems = VT.getVectorNumElements();
3949 
3950  if (NumElems != 4)
3951  return false;
3952 
3953  return isUndefOrEqual(Mask[0], 2) &&
3954  isUndefOrEqual(Mask[1], 3) &&
3955  isUndefOrEqual(Mask[2], 2) &&
3956  isUndefOrEqual(Mask[3], 3);
3957 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVHLPSMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVHLPS.

Definition at line 3925 of file X86ISelLowering.cpp.

3925  {
3926  if (!VT.is128BitVector())
3927  return false;
3928 
3929  unsigned NumElems = VT.getVectorNumElements();
3930 
3931  if (NumElems != 4)
3932  return false;
3933 
3934  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
3935  return isUndefOrEqual(Mask[0], 6) &&
3936  isUndefOrEqual(Mask[1], 7) &&
3937  isUndefOrEqual(Mask[2], 2) &&
3938  isUndefOrEqual(Mask[3], 3);
3939 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVLHPSMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVLHPS.

Definition at line 3983 of file X86ISelLowering.cpp.

3983  {
3984  if (!VT.is128BitVector())
3985  return false;
3986 
3987  unsigned NumElems = VT.getVectorNumElements();
3988 
3989  if (NumElems != 2 && NumElems != 4)
3990  return false;
3991 
3992  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
3993  if (!isUndefOrEqual(Mask[i], i))
3994  return false;
3995 
3996  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
3997  if (!isUndefOrEqual(Mask[i + e], i + NumElems))
3998  return false;
3999 
4000  return true;
4001 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVLMask ( ArrayRef< int >  Mask,
EVT  VT 
)
static

isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVSS, MOVSD, and MOVD, i.e. setting the lowest element.

Definition at line 4279 of file X86ISelLowering.cpp.

4279  {
4280  if (VT.getVectorElementType().getSizeInBits() < 32)
4281  return false;
4282  if (!VT.is128BitVector())
4283  return false;
4284 
4285  unsigned NumElts = VT.getVectorNumElements();
4286 
4287  if (!isUndefOrEqual(Mask[0], NumElts))
4288  return false;
4289 
4290  for (unsigned i = 1; i != NumElts; ++i)
4291  if (!isUndefOrEqual(Mask[i], i))
4292  return false;
4293 
4294  return true;
4295 }
static bool isUndefOrEqual(int Val, int CmpVal)
EVT getVectorElementType() const
Definition: ValueTypes.h:217
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool isMOVLPMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.

Definition at line 3961 of file X86ISelLowering.cpp.

3961  {
3962  if (!VT.is128BitVector())
3963  return false;
3964 
3965  unsigned NumElems = VT.getVectorNumElements();
3966 
3967  if (NumElems != 2 && NumElems != 4)
3968  return false;
3969 
3970  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
3971  if (!isUndefOrEqual(Mask[i], i + NumElems))
3972  return false;
3973 
3974  for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
3975  if (!isUndefOrEqual(Mask[i], i))
3976  return false;
3977 
3978  return true;
3979 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isMOVSHDUPMask ( ArrayRef< int >  Mask,
MVT  VT,
const X86Subtarget Subtarget 
)
static

isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVSHDUP. Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>

Definition at line 4457 of file X86ISelLowering.cpp.

4458  {
4459  if (!Subtarget->hasSSE3())
4460  return false;
4461 
4462  unsigned NumElems = VT.getVectorNumElements();
4463 
4464  if ((VT.is128BitVector() && NumElems != 4) ||
4465  (VT.is256BitVector() && NumElems != 8) ||
4466  (VT.is512BitVector() && NumElems != 16))
4467  return false;
4468 
4469  // "i+1" is the value the indexed mask element must have
4470  for (unsigned i = 0; i != NumElems; i += 2)
4471  if (!isUndefOrEqual(Mask[i], i+1) ||
4472  !isUndefOrEqual(Mask[i+1], i+1))
4473  return false;
4474 
4475  return true;
4476 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isMOVSLDUPMask ( ArrayRef< int >  Mask,
MVT  VT,
const X86Subtarget Subtarget 
)
static

isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to MOVSLDUP. Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>

Definition at line 4481 of file X86ISelLowering.cpp.

4482  {
4483  if (!Subtarget->hasSSE3())
4484  return false;
4485 
4486  unsigned NumElems = VT.getVectorNumElements();
4487 
4488  if ((VT.is128BitVector() && NumElems != 4) ||
4489  (VT.is256BitVector() && NumElems != 8) ||
4490  (VT.is512BitVector() && NumElems != 16))
4491  return false;
4492 
4493  // "i" is the value the indexed mask element must have
4494  for (unsigned i = 0; i != NumElems; i += 2)
4495  if (!isUndefOrEqual(Mask[i], i) ||
4496  !isUndefOrEqual(Mask[i+1], i))
4497  return false;
4498 
4499  return true;
4500 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isNoopShuffleMask ( ArrayRef< int >  Mask)
static

Tiny helper function to identify a no-op mask.

This is a somewhat boring predicate function. It checks whether the mask array input, which is assumed to be a single-input shuffle mask of the kind used by the X86 shuffle instructions (not a fully general ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an in-place shuffle are 'no-op's.

Definition at line 6898 of file X86ISelLowering.cpp.

6898  {
6899  for (int i = 0, Size = Mask.size(); i < Size; ++i)
6900  if (Mask[i] != -1 && Mask[i] != i)
6901  return false;
6902  return true;
6903 }
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
static bool isPALIGNRMask ( ArrayRef< int >  Mask,
MVT  VT,
const X86Subtarget Subtarget 
)
static

isPALIGNRMask - Return true if the node specifies a shuffle of elements that is suitable for input to PALIGNR.

Definition at line 3776 of file X86ISelLowering.cpp.

3777  {
3778  if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
3779  (VT.is256BitVector() && !Subtarget->hasInt256()))
3780  return false;
3781 
3782  unsigned NumElts = VT.getVectorNumElements();
3783  unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
3784  unsigned NumLaneElts = NumElts/NumLanes;
3785 
3786  // Do not handle 64-bit element shuffles with palignr.
3787  if (NumLaneElts == 2)
3788  return false;
3789 
3790  for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
3791  unsigned i;
3792  for (i = 0; i != NumLaneElts; ++i) {
3793  if (Mask[i+l] >= 0)
3794  break;
3795  }
3796 
3797  // Lane is all undef, go to next lane
3798  if (i == NumLaneElts)
3799  continue;
3800 
3801  int Start = Mask[i+l];
3802 
3803  // Make sure its in this lane in one of the sources
3804  if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
3805  !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
3806  return false;
3807 
3808  // If not lane 0, then we must match lane 0
3809  if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
3810  return false;
3811 
3812  // Correct second source to be contiguous with first source
3813  if (Start >= (int)NumElts)
3814  Start -= NumElts - NumLaneElts;
3815 
3816  // Make sure we're shifting in the right direction.
3817  if (Start <= (int)(i+l))
3818  return false;
3819 
3820  Start -= i;
3821 
3822  // Check the rest of the elements to see if they are consecutive.
3823  for (++i; i != NumLaneElts; ++i) {
3824  int Idx = Mask[i+l];
3825 
3826  // Make sure its in this lane
3827  if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
3828  !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
3829  return false;
3830 
3831  // If not lane 0, then we must match lane 0
3832  if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
3833  return false;
3834 
3835  if (Idx >= (int)NumElts)
3836  Idx -= NumElts - NumLaneElts;
3837 
3838  if (!isUndefOrEqual(Idx, Start+i))
3839  return false;
3840 
3841  }
3842  }
3843 
3844  return true;
3845 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
static bool isUndefOrInRange(int Val, int Low, int Hi)
bool hasSSSE3() const
Definition: X86Subtarget.h:314
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool hasInt256() const
Definition: X86Subtarget.h:321
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isPermImmMask ( ArrayRef< int >  Mask,
MVT  VT,
unsigned &  Imm8 
)
static

Definition at line 4357 of file X86ISelLowering.cpp.

4357  {
4358  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4359  if (EltSize < 32)
4360  return false;
4361 
4362  unsigned NumElts = VT.getVectorNumElements();
4363  Imm8 = 0;
4364  if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
4365  for (unsigned i = 0; i != NumElts; ++i) {
4366  if (Mask[i] < 0)
4367  continue;
4368  Imm8 |= Mask[i] << (i*2);
4369  }
4370  return true;
4371  }
4372 
4373  unsigned LaneSize = 4;
4374  SmallVector<int, 4> MaskVal(LaneSize, -1);
4375 
4376  for (unsigned l = 0; l != NumElts; l += LaneSize) {
4377  for (unsigned i = 0; i != LaneSize; ++i) {
4378  if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
4379  return false;
4380  if (Mask[i+l] < 0)
4381  continue;
4382  if (MaskVal[i] < 0) {
4383  MaskVal[i] = Mask[i+l] - l;
4384  Imm8 |= MaskVal[i] << (i*2);
4385  continue;
4386  }
4387  if (Mask[i+l] != (signed)(MaskVal[i]+l))
4388  return false;
4389  }
4390  }
4391  return true;
4392 }
unsigned getSizeInBits() const
static bool isUndefOrInRange(int Val, int Low, int Hi)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getVectorElementType() const
static bool isPSHUFDMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isPSHUFDMask - Return true if the node specifies a shuffle of elements that is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference the second operand.

Definition at line 3708 of file X86ISelLowering.cpp.

3708  {
3709  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
3710  return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
3711  if (VT == MVT::v2f64 || VT == MVT::v2i64)
3712  return (Mask[0] < 2 && Mask[1] < 2);
3713  return false;
3714 }
static bool isPSHUFHWMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256 
)
static

isPSHUFHWMask - Return true if the node specifies a shuffle of elements that is suitable for input to PSHUFHW.

Definition at line 3718 of file X86ISelLowering.cpp.

3718  {
3719  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
3720  return false;
3721 
3722  // Lower quadword copied in order or undef.
3723  if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
3724  return false;
3725 
3726  // Upper quadword shuffled.
3727  for (unsigned i = 4; i != 8; ++i)
3728  if (!isUndefOrInRange(Mask[i], 4, 8))
3729  return false;
3730 
3731  if (VT == MVT::v16i16) {
3732  // Lower quadword copied in order or undef.
3733  if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
3734  return false;
3735 
3736  // Upper quadword shuffled.
3737  for (unsigned i = 12; i != 16; ++i)
3738  if (!isUndefOrInRange(Mask[i], 12, 16))
3739  return false;
3740  }
3741 
3742  return true;
3743 }
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low)
static bool isUndefOrInRange(int Val, int Low, int Hi)
static bool isPSHUFLWMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256 
)
static

isPSHUFLWMask - Return true if the node specifies a shuffle of elements that is suitable for input to PSHUFLW.

Definition at line 3747 of file X86ISelLowering.cpp.

3747  {
3748  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
3749  return false;
3750 
3751  // Upper quadword copied in order.
3752  if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
3753  return false;
3754 
3755  // Lower quadword shuffled.
3756  for (unsigned i = 0; i != 4; ++i)
3757  if (!isUndefOrInRange(Mask[i], 0, 4))
3758  return false;
3759 
3760  if (VT == MVT::v16i16) {
3761  // Upper quadword copied in order.
3762  if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
3763  return false;
3764 
3765  // Lower quadword shuffled.
3766  for (unsigned i = 8; i != 12; ++i)
3767  if (!isUndefOrInRange(Mask[i], 8, 12))
3768  return false;
3769  }
3770 
3771  return true;
3772 }
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low)
static bool isUndefOrInRange(int Val, int Low, int Hi)
static bool isScalarLoadToVector ( SDNode N,
LoadSDNode **  LD = nullptr 
)
static

isScalarLoadToVector - Returns true if the node is a scalar load that is promoted to a vector. It also returns the LoadSDNode by reference if required.

Definition at line 4791 of file X86ISelLowering.cpp.

4791  {
4792  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
4793  return false;
4794  N = N->getOperand(0).getNode();
4795  if (!ISD::isNON_EXTLoad(N))
4796  return false;
4797  if (LD)
4798  *LD = cast<LoadSDNode>(N);
4799  return true;
4800 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
SDNode * getNode() const
get the SDNode which holds the desired result
#define N
bool isNON_EXTLoad(const SDNode *N)
static bool isSequentialOrUndefInRange ( ArrayRef< int >  Mask,
unsigned  Pos,
unsigned  Size,
int  Low 
)
static

isSequentialOrUndefInRange - Return true if every element in Mask, beginning from position Pos and ending in Pos+Size, falls within the specified sequential range (L, L+Pos]. or is undef.

Definition at line 3697 of file X86ISelLowering.cpp.

3698  {
3699  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
3700  if (!isUndefOrEqual(Mask[i], Low))
3701  return false;
3702  return true;
3703 }
static bool isUndefOrEqual(int Val, int CmpVal)
static bool isShuffleHigh128VectorInsertLow ( ShuffleVectorSDNode SVOp)
static

isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the same as extracting the high 128-bit part of 256-bit vector and then inserting the result into the low part of a new 256-bit vector

Definition at line 18340 of file X86ISelLowering.cpp.

18340  {
18341  EVT VT = SVOp->getValueType(0);
18342  unsigned NumElems = VT.getVectorNumElements();
18343 
18344  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
18345  for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
18346  if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
18347  SVOp->getMaskElt(j) >= 0)
18348  return false;
18349 
18350  return true;
18351 }
static bool isUndefOrEqual(int Val, int CmpVal)
EVT getValueType(unsigned ResNo) const
int getMaskElt(unsigned Idx) const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool isShuffleLow128VectorInsertHigh ( ShuffleVectorSDNode SVOp)
static

isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the same as extracting the low 128-bit part of 256-bit vector and then inserting the result into the high part of a new 256-bit vector

Definition at line 18356 of file X86ISelLowering.cpp.

18356  {
18357  EVT VT = SVOp->getValueType(0);
18358  unsigned NumElems = VT.getVectorNumElements();
18359 
18360  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
18361  for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
18362  if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
18363  SVOp->getMaskElt(j) >= 0)
18364  return false;
18365 
18366  return true;
18367 }
static bool isUndefOrEqual(int Val, int CmpVal)
EVT getValueType(unsigned ResNo) const
int getMaskElt(unsigned Idx) const
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool isShuffleMaskConsecutive ( ShuffleVectorSDNode SVOp,
unsigned  MaskI,
unsigned  MaskE,
unsigned  OpIdx,
unsigned  NumElems,
unsigned &  OpNum 
)
static

isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) correspond consecutively to elements from one of the vector operands, starting from its index OpIdx. Also tell OpNum which source vector operand.

Definition at line 5280 of file X86ISelLowering.cpp.

5282  {
5283  bool SeenV1 = false;
5284  bool SeenV2 = false;
5285 
5286  for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
5287  int Idx = SVOp->getMaskElt(i);
5288  // Ignore undef indicies
5289  if (Idx < 0)
5290  continue;
5291 
5292  if (Idx < (int)NumElems)
5293  SeenV1 = true;
5294  else
5295  SeenV2 = true;
5296 
5297  // Only accept consecutive elements from the same vector
5298  if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
5299  return false;
5300  }
5301 
5302  OpNum = SeenV1 ? 0 : 1;
5303  return true;
5304 }
int getMaskElt(unsigned Idx) const
static bool isSHUFPMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  Commuted = false 
)
static

isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to 128/256-bit SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be reverse of what x86 shuffles want.

Definition at line 3866 of file X86ISelLowering.cpp.

3866  {
3867 
3868  unsigned NumElems = VT.getVectorNumElements();
3869  unsigned NumLanes = VT.getSizeInBits()/128;
3870  unsigned NumLaneElems = NumElems/NumLanes;
3871 
3872  if (NumLaneElems != 2 && NumLaneElems != 4)
3873  return false;
3874 
3875  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3876  bool symetricMaskRequired =
3877  (VT.getSizeInBits() >= 256) && (EltSize == 32);
3878 
3879  // VSHUFPSY divides the resulting vector into 4 chunks.
3880  // The sources are also splitted into 4 chunks, and each destination
3881  // chunk must come from a different source chunk.
3882  //
3883  // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
3884  // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
3885  //
3886  // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
3887  // Y3..Y0, Y3..Y0, X3..X0, X3..X0
3888  //
3889  // VSHUFPDY divides the resulting vector into 4 chunks.
3890  // The sources are also splitted into 4 chunks, and each destination
3891  // chunk must come from a different source chunk.
3892  //
3893  // SRC1 => X3 X2 X1 X0
3894  // SRC2 => Y3 Y2 Y1 Y0
3895  //
3896  // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
3897  //
3898  SmallVector<int, 4> MaskVal(NumLaneElems, -1);
3899  unsigned HalfLaneElems = NumLaneElems/2;
3900  for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
3901  for (unsigned i = 0; i != NumLaneElems; ++i) {
3902  int Idx = Mask[i+l];
3903  unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
3904  if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
3905  return false;
3906  // For VSHUFPSY, the mask of the second half must be the same as the
3907  // first but with the appropriate offsets. This works in the same way as
3908  // VPERMILPS works with masks.
3909  if (!symetricMaskRequired || Idx < 0)
3910  continue;
3911  if (MaskVal[i] < 0) {
3912  MaskVal[i] = Idx - l;
3913  continue;
3914  }
3915  if ((signed)(Idx - l) != MaskVal[i])
3916  return false;
3917  }
3918  }
3919 
3920  return true;
3921 }
unsigned getSizeInBits() const
static bool isUndefOrInRange(int Val, int Low, int Hi)
unsigned getVectorNumElements() const
MVT getVectorElementType() const
static bool isSingleInputShuffleMask ( ArrayRef< int >  Mask)
static

Helper function to classify a mask as a single-input mask.

This isn't a generic single-input test because in the vector shuffle lowering we canonicalize single inputs to be the first input operand. This means we can more quickly test for a single input by only checking whether an input from the second operand exists. We also assume that the size of mask corresponds to the size of the input vectors which isn't true in the fully general case.

Definition at line 6913 of file X86ISelLowering.cpp.

6913  {
6914  for (int M : Mask)
6915  if (M >= (int)Mask.size())
6916  return false;
6917  return true;
6918 }
static bool IsTailCallConvention ( CallingConv::ID  CC)
static

IsTailCallConvention - Return true if the calling convention is one that supports tail call optimization.

Definition at line 2172 of file X86ISelLowering.cpp.

static bool isTargetShuffle ( unsigned  Opcode)
static

Definition at line 3378 of file X86ISelLowering.cpp.

3378  {
3379  switch(Opcode) {
3380  default: return false;
3381  case X86ISD::PSHUFD:
3382  case X86ISD::PSHUFHW:
3383  case X86ISD::PSHUFLW:
3384  case X86ISD::SHUFP:
3385  case X86ISD::PALIGNR:
3386  case X86ISD::MOVLHPS:
3387  case X86ISD::MOVLHPD:
3388  case X86ISD::MOVHLPS:
3389  case X86ISD::MOVLPS:
3390  case X86ISD::MOVLPD:
3391  case X86ISD::MOVSHDUP:
3392  case X86ISD::MOVSLDUP:
3393  case X86ISD::MOVDDUP:
3394  case X86ISD::MOVSS:
3395  case X86ISD::MOVSD:
3396  case X86ISD::UNPCKL:
3397  case X86ISD::UNPCKH:
3398  case X86ISD::VPERMILP:
3399  case X86ISD::VPERM2X128:
3400  case X86ISD::VPERMI:
3401  return true;
3402  }
3403 }
static bool isTruncWithZeroHighBitsInput ( SDValue  V,
SelectionDAG DAG 
)
static

Definition at line 12506 of file X86ISelLowering.cpp.

12506  {
12507  if (V.getOpcode() != ISD::TRUNCATE)
12508  return false;
12509 
12510  SDValue VOp0 = V.getOperand(0);
12511  unsigned InBits = VOp0.getValueSizeInBits();
12512  unsigned Bits = V.getValueSizeInBits();
12513  return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
12514 }
unsigned getValueSizeInBits() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static bool isUndefOrEqual ( int  Val,
int  CmpVal 
)
static

isUndefOrEqual - Val is either less than zero (undef) or equal to the specified value.

Definition at line 3690 of file X86ISelLowering.cpp.

3690  {
3691  return (Val < 0 || Val == CmpVal);
3692 }
static bool isUndefOrInRange ( int  Val,
int  Low,
int  Hi 
)
static

isUndefOrInRange - Return true if Val is undef or if its value falls within the specified range (L, H].

Definition at line 3684 of file X86ISelLowering.cpp.

3684  {
3685  return (Val < 0) || (Val >= Low && Val < Hi);
3686 }
static bool isUNPCKH_v_undef_Mask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256 
)
static

isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, <2, 2, 3, 3>

Definition at line 4222 of file X86ISelLowering.cpp.

4222  {
4223  unsigned NumElts = VT.getVectorNumElements();
4224 
4225  if (VT.is512BitVector())
4226  return false;
4227 
4228  assert((VT.is128BitVector() || VT.is256BitVector()) &&
4229  "Unsupported vector type for unpckh");
4230 
4231  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
4232  (!HasInt256 || (NumElts != 16 && NumElts != 32)))
4233  return false;
4234 
4235  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
4236  // independently on 128-bit lanes.
4237  unsigned NumLanes = VT.getSizeInBits()/128;
4238  unsigned NumLaneElts = NumElts/NumLanes;
4239 
4240  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
4241  for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
4242  int BitI = Mask[l+i];
4243  int BitI1 = Mask[l+i+1];
4244  if (!isUndefOrEqual(BitI, j))
4245  return false;
4246  if (!isUndefOrEqual(BitI1, j))
4247  return false;
4248  }
4249  }
4250  return true;
4251 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isUNPCKHMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256,
bool  V2IsSplat = false 
)
static

isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to UNPCKH.

Definition at line 4131 of file X86ISelLowering.cpp.

4132  {
4133  assert(VT.getSizeInBits() >= 128 &&
4134  "Unsupported vector type for unpckh");
4135 
4136  // AVX defines UNPCK* to operate independently on 128-bit lanes.
4137  unsigned NumLanes;
4138  unsigned NumOf256BitLanes;
4139  unsigned NumElts = VT.getVectorNumElements();
4140  if (VT.is256BitVector()) {
4141  if (NumElts != 4 && NumElts != 8 &&
4142  (!HasInt256 || (NumElts != 16 && NumElts != 32)))
4143  return false;
4144  NumLanes = 2;
4145  NumOf256BitLanes = 1;
4146  } else if (VT.is512BitVector()) {
4147  assert(VT.getScalarType().getSizeInBits() >= 32 &&
4148  "Unsupported vector type for unpckh");
4149  NumLanes = 2;
4150  NumOf256BitLanes = 2;
4151  } else {
4152  NumLanes = 1;
4153  NumOf256BitLanes = 1;
4154  }
4155 
4156  unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
4157  unsigned NumLaneElts = NumEltsInStride/NumLanes;
4158 
4159  for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
4160  for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
4161  for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
4162  int BitI = Mask[l256*NumEltsInStride+l+i];
4163  int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
4164  if (!isUndefOrEqual(BitI, j+l256*NumElts))
4165  return false;
4166  if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
4167  return false;
4168  if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
4169  return false;
4170  }
4171  }
4172  }
4173  return true;
4174 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
MVT getScalarType() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isUNPCKL_v_undef_Mask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256 
)
static

isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, <0, 0, 1, 1>

Definition at line 4179 of file X86ISelLowering.cpp.

4179  {
4180  unsigned NumElts = VT.getVectorNumElements();
4181  bool Is256BitVec = VT.is256BitVector();
4182 
4183  if (VT.is512BitVector())
4184  return false;
4185  assert((VT.is128BitVector() || VT.is256BitVector()) &&
4186  "Unsupported vector type for unpckh");
4187 
4188  if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
4189  (!HasInt256 || (NumElts != 16 && NumElts != 32)))
4190  return false;
4191 
4192  // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
4193  // FIXME: Need a better way to get rid of this, there's no latency difference
4194  // between UNPCKLPD and MOVDDUP, the later should always be checked first and
4195  // the former later. We should also remove the "_undef" special mask.
4196  if (NumElts == 4 && Is256BitVec)
4197  return false;
4198 
4199  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
4200  // independently on 128-bit lanes.
4201  unsigned NumLanes = VT.getSizeInBits()/128;
4202  unsigned NumLaneElts = NumElts/NumLanes;
4203 
4204  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
4205  for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
4206  int BitI = Mask[l+i];
4207  int BitI1 = Mask[l+i+1];
4208 
4209  if (!isUndefOrEqual(BitI, j))
4210  return false;
4211  if (!isUndefOrEqual(BitI1, j))
4212  return false;
4213  }
4214  }
4215 
4216  return true;
4217 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isUNPCKLMask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasInt256,
bool  V2IsSplat = false 
)
static

isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to UNPCKL.

Definition at line 4083 of file X86ISelLowering.cpp.

4084  {
4085 
4086  assert(VT.getSizeInBits() >= 128 &&
4087  "Unsupported vector type for unpckl");
4088 
4089  // AVX defines UNPCK* to operate independently on 128-bit lanes.
4090  unsigned NumLanes;
4091  unsigned NumOf256BitLanes;
4092  unsigned NumElts = VT.getVectorNumElements();
4093  if (VT.is256BitVector()) {
4094  if (NumElts != 4 && NumElts != 8 &&
4095  (!HasInt256 || (NumElts != 16 && NumElts != 32)))
4096  return false;
4097  NumLanes = 2;
4098  NumOf256BitLanes = 1;
4099  } else if (VT.is512BitVector()) {
4100  assert(VT.getScalarType().getSizeInBits() >= 32 &&
4101  "Unsupported vector type for unpckh");
4102  NumLanes = 2;
4103  NumOf256BitLanes = 2;
4104  } else {
4105  NumLanes = 1;
4106  NumOf256BitLanes = 1;
4107  }
4108 
4109  unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
4110  unsigned NumLaneElts = NumEltsInStride/NumLanes;
4111 
4112  for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
4113  for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
4114  for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
4115  int BitI = Mask[l256*NumEltsInStride+l+i];
4116  int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
4117  if (!isUndefOrEqual(BitI, j+l256*NumElts))
4118  return false;
4119  if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
4120  return false;
4121  if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
4122  return false;
4123  }
4124  }
4125  }
4126  return true;
4127 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
MVT getScalarType() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isVectorShift ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG,
bool &  isLeft,
SDValue ShVal,
unsigned &  ShAmt 
)
static

isVectorShift - Returns true if the shuffle can be implemented as a logical left or right shift of a vector.

Definition at line 5378 of file X86ISelLowering.cpp.

5379  {
5380  // Although the logic below support any bitwidth size, there are no
5381  // shift instructions which handle more than 128-bit vectors.
5382  if (!SVOp->getSimpleValueType(0).is128BitVector())
5383  return false;
5384 
5385  if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
5386  isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
5387  return true;
5388 
5389  return false;
5390 }
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
MVT getSimpleValueType(unsigned ResNo) const
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt)
static bool isVectorShiftLeft ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG,
bool &  isLeft,
SDValue ShVal,
unsigned &  ShAmt 
)
static

isVectorShiftLeft - Returns true if the shuffle can be implemented as a logical left shift of a vector.

Definition at line 5343 of file X86ISelLowering.cpp.

5344  {
5345  unsigned NumElems =
5347  unsigned NumZeros = getNumOfConsecutiveZeros(
5348  SVOp, NumElems, true /* check zeros from left */, DAG,
5349  NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
5350  unsigned OpSrc;
5351 
5352  if (!NumZeros)
5353  return false;
5354 
5355  // Considering the elements in the mask that are not consecutive zeros,
5356  // check if they consecutively come from only one of the source vectors.
5357  //
5358  // 0 { A, B, X, X } = V2
5359  // / \ / /
5360  // vector_shuffle V1, V2 <X, X, 4, 5>
5361  //
5362  if (!isShuffleMaskConsecutive(SVOp,
5363  NumZeros, // Mask Start Index
5364  NumElems, // Mask End Index(exclusive)
5365  0, // Where to start looking in the src vector
5366  NumElems, // Number of elements in vector
5367  OpSrc)) // Which source operand ?
5368  return false;
5369 
5370  isLeft = true;
5371  ShAmt = NumZeros;
5372  ShVal = SVOp->getOperand(OpSrc);
5373  return true;
5374 }
const SDValue & getOperand(unsigned Num) const
int getMaskElt(unsigned Idx) const
static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, bool ZerosFromLeft, SelectionDAG &DAG, unsigned PreferredNum=-1U)
unsigned getVectorNumElements() const
static bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, unsigned MaskI, unsigned MaskE, unsigned OpIdx, unsigned NumElems, unsigned &OpNum)
MVT getSimpleValueType(unsigned ResNo) const
static bool isVectorShiftRight ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG,
bool &  isLeft,
SDValue ShVal,
unsigned &  ShAmt 
)
static

isVectorShiftRight - Returns true if the shuffle can be implemented as a logical left shift of a vector.

Definition at line 5308 of file X86ISelLowering.cpp.

5309  {
5310  unsigned NumElems =
5312  unsigned NumZeros = getNumOfConsecutiveZeros(
5313  SVOp, NumElems, false /* check zeros from right */, DAG,
5314  SVOp->getMaskElt(0));
5315  unsigned OpSrc;
5316 
5317  if (!NumZeros)
5318  return false;
5319 
5320  // Considering the elements in the mask that are not consecutive zeros,
5321  // check if they consecutively come from only one of the source vectors.
5322  //
5323  // V1 = {X, A, B, C} 0
5324  // \ \ \ /
5325  // vector_shuffle V1, V2 <1, 2, 3, X>
5326  //
5327  if (!isShuffleMaskConsecutive(SVOp,
5328  0, // Mask Start Index
5329  NumElems-NumZeros, // Mask End Index(exclusive)
5330  NumZeros, // Where to start looking in the src vector
5331  NumElems, // Number of elements in vector
5332  OpSrc)) // Which source operand ?
5333  return false;
5334 
5335  isLeft = false;
5336  ShAmt = NumZeros;
5337  ShVal = SVOp->getOperand(OpSrc);
5338  return true;
5339 }
const SDValue & getOperand(unsigned Num) const
int getMaskElt(unsigned Idx) const
static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, bool ZerosFromLeft, SelectionDAG &DAG, unsigned PreferredNum=-1U)
unsigned getVectorNumElements() const
static bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, unsigned MaskI, unsigned MaskE, unsigned OpIdx, unsigned NumElems, unsigned &OpNum)
MVT getSimpleValueType(unsigned ResNo) const
static bool isVEXTRACTIndex ( SDNode N,
unsigned  vecWidth 
)
static

isVEXTRACTIndex - Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector extract that is suitable for instruction that extract 128 or 256 bit vectors

Definition at line 4542 of file X86ISelLowering.cpp.

4542  {
4543  assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
4544  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
4545  return false;
4546 
4547  // The index should be aligned on a vecWidth-bit boundary.
4548  uint64_t Index =
4549  cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
4550 
4551  MVT VT = N->getSimpleValueType(0);
4552  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
4553  bool Result = (Index * ElSize) % vecWidth == 0;
4554 
4555  return Result;
4556 }
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned Num) const
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static bool isVINSERTIndex ( SDNode N,
unsigned  vecWidth 
)
static

isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR operand specifies a subvector insert that is suitable for input to insertion of 128 or 256-bit subvectors

Definition at line 4561 of file X86ISelLowering.cpp.

4561  {
4562  assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
4563  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
4564  return false;
4565  // The index should be aligned on a vecWidth-bit boundary.
4566  uint64_t Index =
4567  cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
4568 
4569  MVT VT = N->getSimpleValueType(0);
4570  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
4571  bool Result = (Index * ElSize) % vecWidth == 0;
4572 
4573  return Result;
4574 }
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned Num) const
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static bool isVPERM2X128Mask ( ArrayRef< int >  Mask,
MVT  VT,
bool  HasFp256 
)
static

isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered as permutations between 128-bit chunks or halves. As an example: this shuffle bellow: vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> The first half comes from the second half of V1 and the second half from the the second half of V2.

Definition at line 4303 of file X86ISelLowering.cpp.

4303  {
4304  if (!HasFp256 || !VT.is256BitVector())
4305  return false;
4306 
4307  // The shuffle result is divided into half A and half B. In total the two
4308  // sources have 4 halves, namely: C, D, E, F. The final values of A and
4309  // B must come from C, D, E or F.
4310  unsigned HalfSize = VT.getVectorNumElements()/2;
4311  bool MatchA = false, MatchB = false;
4312 
4313  // Check if A comes from one of C, D, E, F.
4314  for (unsigned Half = 0; Half != 4; ++Half) {
4315  if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
4316  MatchA = true;
4317  break;
4318  }
4319  }
4320 
4321  // Check if B comes from one of C, D, E, F.
4322  for (unsigned Half = 0; Half != 4; ++Half) {
4323  if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
4324  MatchB = true;
4325  break;
4326  }
4327  }
4328 
4329  return MatchA && MatchB;
4330 }
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low)
unsigned getVectorNumElements() const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static bool isVPERMILPMask ( ArrayRef< int >  Mask,
MVT  VT 
)
static

isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand specifies a shuffle of elements that is suitable for input to VPERMILPD*. Note that VPERMIL mask matching is different depending whether theunderlying type is 32 or 64. In the VPERMILPS the high half of the mask should point to the same elements of the low, but to the higher half of the source. In VPERMILPD the two lanes could be shuffled independently of each other with the same restriction that lanes can't be crossed. Also handles PSHUFDY.

Definition at line 4401 of file X86ISelLowering.cpp.

4401  {
4402  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
4403  if (VT.getSizeInBits() < 256 || EltSize < 32)
4404  return false;
4405  bool symetricMaskRequired = (EltSize == 32);
4406  unsigned NumElts = VT.getVectorNumElements();
4407 
4408  unsigned NumLanes = VT.getSizeInBits()/128;
4409  unsigned LaneSize = NumElts/NumLanes;
4410  // 2 or 4 elements in one lane
4411 
4412  SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
4413  for (unsigned l = 0; l != NumElts; l += LaneSize) {
4414  for (unsigned i = 0; i != LaneSize; ++i) {
4415  if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
4416  return false;
4417  if (symetricMaskRequired) {
4418  if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
4419  ExpectedMaskVal[i] = Mask[i+l] - l;
4420  continue;
4421  }
4422  if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
4423  return false;
4424  }
4425  }
4426  }
4427  return true;
4428 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getSizeInBits() const
static bool isUndefOrInRange(int Val, int Low, int Hi)
unsigned getVectorNumElements() const
MVT getVectorElementType() const
static bool isX86CCUnsigned ( unsigned  X86CC)
static

Return true if the condition is an unsigned comparison operation.

Definition at line 3535 of file X86ISelLowering.cpp.

3535  {
3536  switch (X86CC) {
3537  default: llvm_unreachable("Invalid integer condition!");
3538  case X86::COND_E: return true;
3539  case X86::COND_G: return false;
3540  case X86::COND_GE: return false;
3541  case X86::COND_L: return false;
3542  case X86::COND_LE: return false;
3543  case X86::COND_NE: return true;
3544  case X86::COND_B: return true;
3545  case X86::COND_A: return true;
3546  case X86::COND_BE: return true;
3547  case X86::COND_AE: return true;
3548  }
3549  llvm_unreachable("covered switch fell through?!");
3550 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
static bool isX86LogicalCmp ( SDValue  Op)
static

Definition at line 12481 of file X86ISelLowering.cpp.

12481  {
12482  unsigned Opc = Op.getNode()->getOpcode();
12483  if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
12484  Opc == X86ISD::SAHF)
12485  return true;
12486  if (Op.getResNo() == 1 &&
12487  (Opc == X86ISD::ADD ||
12488  Opc == X86ISD::SUB ||
12489  Opc == X86ISD::ADC ||
12490  Opc == X86ISD::SBB ||
12491  Opc == X86ISD::SMUL ||
12492  Opc == X86ISD::UMUL ||
12493  Opc == X86ISD::INC ||
12494  Opc == X86ISD::DEC ||
12495  Opc == X86ISD::OR ||
12496  Opc == X86ISD::XOR ||
12497  Opc == X86ISD::AND))
12498  return true;
12499 
12500  if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
12501  return true;
12502 
12503  return false;
12504 }
unsigned getOpcode() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDNode * getNode() const
get the SDNode which holds the desired result
static bool isXor1OfSetCC ( SDValue  Op)
static

Definition at line 12829 of file X86ISelLowering.cpp.

12829  {
12830  if (Op.getOpcode() != ISD::XOR)
12831  return false;
12833  if (N1C && N1C->getAPIntValue() == 1) {
12834  return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
12835  Op.getOperand(0).hasOneUse();
12836  }
12837  return false;
12838 }
bool hasOneUse() const
const APInt & getAPIntValue() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
static bool isZero ( SDValue  V)
static

isZero - Returns true if Elt is a constant integer zero

Definition at line 4755 of file X86ISelLowering.cpp.

4755  {
4757  return C && C->isNullValue();
4758 }
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
static bool isZeroShuffle ( ShuffleVectorSDNode N)
static

isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved to an zero vector. FIXME: move to dag combiner / method on ShuffleVectorSDNode

Definition at line 4858 of file X86ISelLowering.cpp.

4858  {
4859  SDValue V1 = N->getOperand(0);
4860  SDValue V2 = N->getOperand(1);
4861  unsigned NumElems = N->getValueType(0).getVectorNumElements();
4862  for (unsigned i = 0; i != NumElems; ++i) {
4863  int Idx = N->getMaskElt(i);
4864  if (Idx >= (int)NumElems) {
4865  unsigned Opc = V2.getOpcode();
4866  if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
4867  continue;
4868  if (Opc != ISD::BUILD_VECTOR ||
4869  !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
4870  return false;
4871  } else if (Idx >= 0) {
4872  unsigned Opc = V1.getOpcode();
4873  if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
4874  continue;
4875  if (Opc != ISD::BUILD_VECTOR ||
4876  !X86::isZeroNode(V1.getOperand(Idx)))
4877  return false;
4878  }
4879  }
4880  return true;
4881 }
const SDValue & getOperand(unsigned Num) const
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
int getMaskElt(unsigned Idx) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool isZeroNode(SDValue Elt)
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue lower128BitVectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
MVT  VT,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Dispatching routine to lower various 128-bit x86 vector shuffles.

This routine breaks down the specific type of 128-bit shuffle and dispatches to the lowering routines accordingly.

Definition at line 7874 of file X86ISelLowering.cpp.

7876  {
7877  switch (VT.SimpleTy) {
7878  case MVT::v2i64:
7879  return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
7880  case MVT::v2f64:
7881  return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
7882  case MVT::v4i32:
7883  return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
7884  case MVT::v4f32:
7885  return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
7886  case MVT::v8i16:
7887  return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
7888  case MVT::v16i8:
7889  return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
7890 
7891  default:
7892  llvm_unreachable("Unimplemented!");
7893  }
7894 }
static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit floating point shuffles.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SimpleValueType SimpleTy
static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Generic lowering of v16i8 shuffles.
static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Lower 4-lane i32 vector shuffles.
static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Handle lowering of 2-lane 64-bit integer shuffles.
static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Generic lowering of 8-lane i16 shuffles.
static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Lower 4-lane 32-bit floating point shuffles.
static SDValue Lower256IntArith ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 14993 of file X86ISelLowering.cpp.

14993  {
14994  MVT VT = Op.getSimpleValueType();
14995 
14996  assert(VT.is256BitVector() && VT.isInteger() &&
14997  "Unsupported value type for operation");
14998 
14999  unsigned NumElems = VT.getVectorNumElements();
15000  SDLoc dl(Op);
15001 
15002  // Extract the LHS vectors
15003  SDValue LHS = Op.getOperand(0);
15004  SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
15005  SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
15006 
15007  // Extract the RHS vectors
15008  SDValue RHS = Op.getOperand(1);
15009  SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
15010  SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
15011 
15012  MVT EltVT = VT.getVectorElementType();
15013  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
15014 
15015  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
15016  DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
15017  DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
15018 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
assert(Globals.size() > 1)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getVectorElementType() const
static SDValue Lower256IntVSETCC ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 12072 of file X86ISelLowering.cpp.

12072  {
12073  MVT VT = Op.getSimpleValueType();
12074 
12075  assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
12076  "Unsupported value type for operation");
12077 
12078  unsigned NumElems = VT.getVectorNumElements();
12079  SDLoc dl(Op);
12080  SDValue CC = Op.getOperand(2);
12081 
12082  // Extract the LHS vectors
12083  SDValue LHS = Op.getOperand(0);
12084  SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
12085  SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
12086 
12087  // Extract the RHS vectors
12088  SDValue RHS = Op.getOperand(1);
12089  SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
12090  SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
12091 
12092  // Issue the operation on the smaller types and concatenate the result back
12093  MVT EltVT = VT.getVectorElementType();
12094  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
12095  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
12096  DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
12097  DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
12098 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getVectorElementType() const
static SDValue LowerADD ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 15020 of file X86ISelLowering.cpp.

15020  {
15022  Op.getSimpleValueType().isInteger() &&
15023  "Only handle AVX 256-bit vector integer operation");
15024  return Lower256IntArith(Op, DAG);
15025 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG)
assert(Globals.size() > 1)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue LowerADDC_ADDE_SUBC_SUBE ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 16128 of file X86ISelLowering.cpp.

16128  {
16129  EVT VT = Op.getNode()->getSimpleValueType(0);
16130 
16131  // Let legalize expand this if it isn't a legal type yet.
16132  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
16133  return SDValue();
16134 
16135  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
16136 
16137  unsigned Opc;
16138  bool ExtraOp = false;
16139  switch (Op.getOpcode()) {
16140  default: llvm_unreachable("Invalid code");
16141  case ISD::ADDC: Opc = X86ISD::ADD; break;
16142  case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
16143  case ISD::SUBC: Opc = X86ISD::SUB; break;
16144  case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
16145  }
16146 
16147  if (!ExtraOp)
16148  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
16149  Op.getOperand(1));
16150  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
16151  Op.getOperand(1), Op.getOperand(2));
16152 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDVTList getVTList(EVT VT)
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerADJUST_TRAMPOLINE ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 14690 of file X86ISelLowering.cpp.

14690  {
14691  return Op.getOperand(0);
14692 }
const SDValue & getOperand(unsigned i) const
static SDValue LowerANY_EXTEND ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 11154 of file X86ISelLowering.cpp.

11155  {
11156  if (Subtarget->hasFp256()) {
11157  SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
11158  if (Res.getNode())
11159  return Res;
11160  }
11161 
11162  return SDValue();
11163 }
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasFp256() const
Definition: X86Subtarget.h:320
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
static SDValue LowerAsSplatVectorLoad ( SDValue  SrcOp,
MVT  VT,
SDLoc  dl,
SelectionDAG DAG 
)
static

Definition at line 5561 of file X86ISelLowering.cpp.

5561  {
5562 
5563  // Check if the scalar load can be widened into a vector load. And if
5564  // the address is "base + cst" see if the cst can be "absorbed" into
5565  // the shuffle mask.
5566  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
5567  SDValue Ptr = LD->getBasePtr();
5568  if (!ISD::isNormalLoad(LD) || LD->isVolatile())
5569  return SDValue();
5570  EVT PVT = LD->getValueType(0);
5571  if (PVT != MVT::i32 && PVT != MVT::f32)
5572  return SDValue();
5573 
5574  int FI = -1;
5575  int64_t Offset = 0;
5576  if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
5577  FI = FINode->getIndex();
5578  Offset = 0;
5579  } else if (DAG.isBaseWithConstantOffset(Ptr) &&
5580  isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
5581  FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
5582  Offset = Ptr.getConstantOperandVal(1);
5583  Ptr = Ptr.getOperand(0);
5584  } else {
5585  return SDValue();
5586  }
5587 
5588  // FIXME: 256-bit vector instructions don't require a strict alignment,
5589  // improve this code to support it better.
5590  unsigned RequiredAlign = VT.getSizeInBits()/8;
5591  SDValue Chain = LD->getChain();
5592  // Make sure the stack object alignment is at least 16 or 32.
5594  if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
5595  if (MFI->isFixedObjectIndex(FI)) {
5596  // Can't change the alignment. FIXME: It's possible to compute
5597  // the exact stack offset and reference FI + adjust offset instead.
5598  // If someone *really* cares about this. That's the way to implement it.
5599  return SDValue();
5600  } else {
5601  MFI->setObjectAlignment(FI, RequiredAlign);
5602  }
5603  }
5604 
5605  // (Offset % 16 or 32) must be multiple of 4. Then address is then
5606  // Ptr + (Offset & ~15).
5607  if (Offset < 0)
5608  return SDValue();
5609  if ((Offset % RequiredAlign) & 3)
5610  return SDValue();
5611  int64_t StartOffset = Offset & ~(RequiredAlign-1);
5612  if (StartOffset)
5613  Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
5614  Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
5615 
5616  int EltNo = (Offset - StartOffset) >> 2;
5617  unsigned NumElems = VT.getVectorNumElements();
5618 
5619  EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
5620  SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
5621  LD->getPointerInfo().getWithOffset(StartOffset),
5622  false, false, false, 0);
5623 
5624  SmallVector<int, 8> Mask;
5625  for (unsigned i = 0; i != NumElems; ++i)
5626  Mask.push_back(EltNo);
5627 
5628  return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
5629  }
5630 
5631  return SDValue();
5632 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
unsigned InferPtrAlignment(SDValue Ptr) const
unsigned getSizeInBits() const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
Abstract Stack Frame Information.
bool isFixedObjectIndex(int ObjectIdx) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
bool isNormalLoad(const SDNode *N)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
bool isBaseWithConstantOffset(SDValue Op) const
MachineFrameInfo * getFrameInfo()
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
uint64_t getConstantOperandVal(unsigned i) const
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
void setObjectAlignment(int ObjectIdx, unsigned Align)
setObjectAlignment - Change the alignment of the specified stack object.
static SDValue LowerATOMIC_FENCE ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 15959 of file X86ISelLowering.cpp.

15960  {
15961  SDLoc dl(Op);
15962  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
15963  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
15964  SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
15965  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
15966 
15967  // The only fence that needs an instruction is a sequentially-consistent
15968  // cross-thread fence.
15969  if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
15970  // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
15971  // no-sse2). There isn't any reason to disable it if the target processor
15972  // supports it.
15973  if (Subtarget->hasSSE2() || Subtarget->is64Bit())
15974  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
15975 
15976  SDValue Chain = Op.getOperand(0);
15977  SDValue Zero = DAG.getConstant(0, MVT::i32);
15978  SDValue Ops[] = {
15979  DAG.getRegister(X86::ESP, MVT::i32), // Base
15980  DAG.getTargetConstant(1, MVT::i8), // Scale
15981  DAG.getRegister(0, MVT::i32), // Index
15982  DAG.getTargetConstant(0, MVT::i32), // Disp
15983  DAG.getRegister(0, MVT::i32), // Segment.
15984  Zero,
15985  Chain
15986  };
15987  SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
15988  return SDValue(Res, 0);
15989  }
15990 
15991  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
15992  return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
15993 }
SynchronizationScope
Definition: Instructions.h:48
AtomicOrdering
Definition: Instructions.h:37
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
bool hasSSE2() const
Definition: X86Subtarget.h:312
const SDValue & getOperand(unsigned i) const
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getRegister(unsigned Reg, EVT VT)
static SDValue LowerATOMIC_STORE ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 16103 of file X86ISelLowering.cpp.

16103  {
16104  SDNode *Node = Op.getNode();
16105  SDLoc dl(Node);
16106  EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
16107 
16108  // Convert seq_cst store -> xchg
16109  // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
16110  // FIXME: On 32-bit, store -> fist or movq would be more efficient
16111  // (The only way to get a 16-byte store is cmpxchg16b)
16112  // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
16113  if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
16114  !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
16115  SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
16116  cast<AtomicSDNode>(Node)->getMemoryVT(),
16117  Node->getOperand(0),
16118  Node->getOperand(1), Node->getOperand(2),
16119  cast<AtomicSDNode>(Node)->getMemOperand(),
16120  cast<AtomicSDNode>(Node)->getOrdering(),
16121  cast<AtomicSDNode>(Node)->getSynchScope());
16122  return Swap.getValue(1);
16123  }
16124  // Other atomic stores have a simple pattern.
16125  return Op;
16126 }
SDValue getValue(unsigned R) const
const SDValue & getOperand(unsigned Num) const
const DomTreeNodeT * Node
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
static SDValue LowerAVXCONCAT_VECTORS ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 6842 of file X86ISelLowering.cpp.

6842  {
6843  SDLoc dl(Op);
6844  MVT ResVT = Op.getSimpleValueType();
6845 
6846  assert((ResVT.is256BitVector() ||
6847  ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
6848 
6849  SDValue V1 = Op.getOperand(0);
6850  SDValue V2 = Op.getOperand(1);
6851  unsigned NumElems = ResVT.getVectorNumElements();
6852  if(ResVT.is256BitVector())
6853  return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
6854 
6855  if (Op.getNumOperands() == 4) {
6856  MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
6857  ResVT.getVectorNumElements()/2);
6858  SDValue V3 = Op.getOperand(2);
6859  SDValue V4 = Op.getOperand(3);
6860  return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
6861  Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
6862  }
6863  return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
6864 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getNumOperands() const
MVT getScalarType() const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl)
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, SDLoc dl)
static SDValue LowerAVXExtend ( SDValue  Op,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 11080 of file X86ISelLowering.cpp.

11081  {
11082  MVT VT = Op->getSimpleValueType(0);
11083  SDValue In = Op->getOperand(0);
11084  MVT InVT = In.getSimpleValueType();
11085  SDLoc dl(Op);
11086 
11087  // Optimize vectors in AVX mode:
11088  //
11089  // v8i16 -> v8i32
11090  // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
11091  // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
11092  // Concat upper and lower parts.
11093  //
11094  // v4i32 -> v4i64
11095  // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
11096  // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
11097  // Concat upper and lower parts.
11098  //
11099 
11100  if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
11101  ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
11102  ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
11103  return SDValue();
11104 
11105  if (Subtarget->hasInt256())
11106  return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
11107 
11108  SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
11109  SDValue Undef = DAG.getUNDEF(InVT);
11110  bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
11111  SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
11112  SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
11113 
11114  MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
11115  VT.getVectorNumElements()/2);
11116 
11117  OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
11118  OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
11119 
11120  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
11121 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
const SDValue & getOperand(unsigned Num) const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
getUnpackh - Returns a vector_shuffle node for an unpackh operation.
unsigned getVectorNumElements() const
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
getUnpackl - Returns a vector_shuffle node for an unpackl operation.
unsigned getOpcode() const
bool hasInt256() const
Definition: X86Subtarget.h:321
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerBITCAST ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 16036 of file X86ISelLowering.cpp.

16037  {
16038  MVT SrcVT = Op.getOperand(0).getSimpleValueType();
16039  MVT DstVT = Op.getSimpleValueType();
16040 
16041  if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
16042  assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
16043  if (DstVT != MVT::f64)
16044  // This conversion needs to be expanded.
16045  return SDValue();
16046 
16047  SDValue InVec = Op->getOperand(0);
16048  SDLoc dl(Op);
16049  unsigned NumElts = SrcVT.getVectorNumElements();
16050  EVT SVT = SrcVT.getVectorElementType();
16051 
16052  // Widen the vector in input in the case of MVT::v2i32.
16053  // Example: from MVT::v2i32 to MVT::v4i32.
16055  for (unsigned i = 0, e = NumElts; i != e; ++i)
16056  Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
16057  DAG.getIntPtrConstant(i)));
16058 
16059  // Explicitly mark the extra elements as Undef.
16060  SDValue Undef = DAG.getUNDEF(SVT);
16061  for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i)
16062  Elts.push_back(Undef);
16063 
16064  EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
16065  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
16066  SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
16067  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
16068  DAG.getIntPtrConstant(0));
16069  }
16070 
16071  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
16072  Subtarget->hasMMX() && "Unexpected custom BITCAST");
16073  assert((DstVT == MVT::i64 ||
16074  (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
16075  "Unexpected custom BITCAST");
16076  // i64 <=> MMX conversions are Legal.
16077  if (SrcVT==MVT::i64 && DstVT.isVector())
16078  return Op;
16079  if (DstVT==MVT::i64 && SrcVT.isVector())
16080  return Op;
16081  // MMX <=> MMX conversions are Legal.
16082  if (SrcVT.isVector() && DstVT.isVector())
16083  return Op;
16084  // All other conversions need to be expanded.
16085  return SDValue();
16086 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned Num) const
bool hasMMX() const
Definition: X86Subtarget.h:310
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
bool hasSSE2() const
Definition: X86Subtarget.h:312
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
bool isVector() const
isVector - Return true if this is a vector value type.
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getVectorElementType() const
static SDValue LowerBuildVectorv16i8 ( SDValue  Op,
unsigned  NonZeros,
unsigned  NumNonZero,
unsigned  NumZero,
SelectionDAG DAG,
const X86Subtarget Subtarget,
const TargetLowering TLI 
)
static

LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.

Definition at line 5394 of file X86ISelLowering.cpp.

5398  {
5399  if (NumNonZero > 8)
5400  return SDValue();
5401 
5402  SDLoc dl(Op);
5403  SDValue V;
5404  bool First = true;
5405  for (unsigned i = 0; i < 16; ++i) {
5406  bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
5407  if (ThisIsNonZero && First) {
5408  if (NumZero)
5409  V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
5410  else
5411  V = DAG.getUNDEF(MVT::v8i16);
5412  First = false;
5413  }
5414 
5415  if ((i & 1) != 0) {
5416  SDValue ThisElt, LastElt;
5417  bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
5418  if (LastIsNonZero) {
5419  LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
5420  MVT::i16, Op.getOperand(i-1));
5421  }
5422  if (ThisIsNonZero) {
5423  ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
5424  ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
5425  ThisElt, DAG.getConstant(8, MVT::i8));
5426  if (LastIsNonZero)
5427  ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
5428  } else
5429  ThisElt = LastElt;
5430 
5431  if (ThisElt.getNode())
5432  V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
5433  DAG.getIntPtrConstant(i/2));
5434  }
5435  }
5436 
5437  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
5438 }
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
const SDValue & getOperand(unsigned i) const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerBuildVectorv4x32 ( SDValue  Op,
unsigned  NumElems,
unsigned  NonZeros,
unsigned  NumNonZero,
unsigned  NumZero,
SelectionDAG DAG,
const X86Subtarget Subtarget,
const TargetLowering TLI 
)
static

LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.

Definition at line 5473 of file X86ISelLowering.cpp.

5477  {
5478  // We know there's at least one non-zero element
5479  unsigned FirstNonZeroIdx = 0;
5480  SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
5481  while (FirstNonZero.getOpcode() == ISD::UNDEF ||
5482  X86::isZeroNode(FirstNonZero)) {
5483  ++FirstNonZeroIdx;
5484  FirstNonZero = Op->getOperand(FirstNonZeroIdx);
5485  }
5486 
5487  if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5488  !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
5489  return SDValue();
5490 
5491  SDValue V = FirstNonZero.getOperand(0);
5492  MVT VVT = V.getSimpleValueType();
5493  if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
5494  return SDValue();
5495 
5496  unsigned FirstNonZeroDst =
5497  cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
5498  unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
5499  unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
5500  unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
5501 
5502  for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
5503  SDValue Elem = Op.getOperand(Idx);
5504  if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
5505  continue;
5506 
5507  // TODO: What else can be here? Deal with it.
5508  if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
5509  return SDValue();
5510 
5511  // TODO: Some optimizations are still possible here
5512  // ex: Getting one element from a vector, and the rest from another.
5513  if (Elem.getOperand(0) != V)
5514  return SDValue();
5515 
5516  unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
5517  if (Dst == Idx)
5518  ++CorrectIdx;
5519  else if (IncorrectIdx == -1U) {
5520  IncorrectIdx = Idx;
5521  IncorrectDst = Dst;
5522  } else
5523  // There was already one element with an incorrect index.
5524  // We can't optimize this case to an insertps.
5525  return SDValue();
5526  }
5527 
5528  if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
5529  SDLoc dl(Op);
5530  EVT VT = Op.getSimpleValueType();
5531  unsigned ElementMoveMask = 0;
5532  if (IncorrectIdx == -1U)
5533  ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
5534  else
5535  ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
5536 
5537  SDValue InsertpsMask =
5538  DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
5539  return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
5540  }
5541 
5542  return SDValue();
5543 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool isZeroNode(SDValue Elt)
static SDValue LowerBuildVectorv8i16 ( SDValue  Op,
unsigned  NonZeros,
unsigned  NumNonZero,
unsigned  NumZero,
SelectionDAG DAG,
const X86Subtarget Subtarget,
const TargetLowering TLI 
)
static

LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.

Definition at line 5442 of file X86ISelLowering.cpp.

5446  {
5447  if (NumNonZero > 4)
5448  return SDValue();
5449 
5450  SDLoc dl(Op);
5451  SDValue V;
5452  bool First = true;
5453  for (unsigned i = 0; i < 8; ++i) {
5454  bool isNonZero = (NonZeros & (1 << i)) != 0;
5455  if (isNonZero) {
5456  if (First) {
5457  if (NumZero)
5458  V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
5459  else
5460  V = DAG.getUNDEF(MVT::v8i16);
5461  First = false;
5462  }
5463  V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
5464  MVT::v8i16, V, Op.getOperand(i),
5465  DAG.getIntPtrConstant(i));
5466  }
5467  }
5468 
5469  return V;
5470 }
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
const SDValue & getOperand(unsigned i) const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue LowerCMP_SWAP ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 15995 of file X86ISelLowering.cpp.

15996  {
15997  MVT T = Op.getSimpleValueType();
15998  SDLoc DL(Op);
15999  unsigned Reg = 0;
16000  unsigned size = 0;
16001  switch(T.SimpleTy) {
16002  default: llvm_unreachable("Invalid value type!");
16003  case MVT::i8: Reg = X86::AL; size = 1; break;
16004  case MVT::i16: Reg = X86::AX; size = 2; break;
16005  case MVT::i32: Reg = X86::EAX; size = 4; break;
16006  case MVT::i64:
16007  assert(Subtarget->is64Bit() && "Node not type legal!");
16008  Reg = X86::RAX; size = 8;
16009  break;
16010  }
16011  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
16012  Op.getOperand(2), SDValue());
16013  SDValue Ops[] = { cpIn.getValue(0),
16014  Op.getOperand(1),
16015  Op.getOperand(3),
16016  DAG.getTargetConstant(size, MVT::i8),
16017  cpIn.getValue(1) };
16018  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
16019  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
16021  Ops, T, MMO);
16022 
16023  SDValue cpOut =
16024  DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
16025  SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
16026  MVT::i32, cpOut.getValue(2));
16028  DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
16029 
16030  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
16032  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
16033  return SDValue();
16034 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getValue(unsigned R) const
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:486
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Reg
All possible values of the reg field in the ModR/M byte.
SimpleValueType SimpleTy
SDVTList getVTList(EVT VT)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
#define T
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:511
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
#define Success
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
static SDValue LowerCONCAT_VECTORS ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 6866 of file X86ISelLowering.cpp.

6866  {
6868  assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
6869  (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
6870  Op.getNumOperands() == 4)));
6871 
6872  // AVX can use the vinsertf128 instruction to create 256-bit vectors
6873  // from two other 128-bit ones.
6874 
6875  // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
6876  return LowerAVXCONCAT_VECTORS(Op, DAG);
6877 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getNumOperands() const
assert(Globals.size() > 1)
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:161
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG)
static SDValue LowerCTLZ ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 14912 of file X86ISelLowering.cpp.

14912  {
14913  MVT VT = Op.getSimpleValueType();
14914  EVT OpVT = VT;
14915  unsigned NumBits = VT.getSizeInBits();
14916  SDLoc dl(Op);
14917 
14918  Op = Op.getOperand(0);
14919  if (VT == MVT::i8) {
14920  // Zero extend to i32 since there is not an i8 bsr.
14921  OpVT = MVT::i32;
14922  Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
14923  }
14924 
14925  // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
14926  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
14927  Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
14928 
14929  // If src is zero (i.e. bsr sets ZF), returns NumBits.
14930  SDValue Ops[] = {
14931  Op,
14932  DAG.getConstant(NumBits+NumBits-1, OpVT),
14933  DAG.getConstant(X86::COND_E, MVT::i8),
14934  Op.getValue(1)
14935  };
14936  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
14937 
14938  // Finally xor with NumBits-1.
14939  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
14940 
14941  if (VT == MVT::i8)
14942  Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
14943  return Op;
14944 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getValue(unsigned R) const
unsigned getSizeInBits() const
SDVTList getVTList(EVT VT)
const SDValue & getOperand(unsigned i) const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static SDValue LowerCTLZ_ZERO_UNDEF ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 14946 of file X86ISelLowering.cpp.

14946  {
14947  MVT VT = Op.getSimpleValueType();
14948  EVT OpVT = VT;
14949  unsigned NumBits = VT.getSizeInBits();
14950  SDLoc dl(Op);
14951 
14952  Op = Op.getOperand(0);
14953  if (VT == MVT::i8) {
14954  // Zero extend to i32 since there is not an i8 bsr.
14955  OpVT = MVT::i32;
14956  Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
14957  }
14958 
14959  // Issue a bsr (scan bits in reverse).
14960  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
14961  Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
14962 
14963  // And xor with NumBits-1.
14964  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
14965 
14966  if (VT == MVT::i8)
14967  Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
14968  return Op;
14969 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getSizeInBits() const
SDVTList getVTList(EVT VT)
const SDValue & getOperand(unsigned i) const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static SDValue LowerCTTZ ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 14971 of file X86ISelLowering.cpp.

14971  {
14972  MVT VT = Op.getSimpleValueType();
14973  unsigned NumBits = VT.getSizeInBits();
14974  SDLoc dl(Op);
14975  Op = Op.getOperand(0);
14976 
14977  // Issue a bsf (scan bits forward) which also sets EFLAGS.
14978  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
14979  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
14980 
14981  // If src is zero (i.e. bsf sets ZF), returns NumBits.
14982  SDValue Ops[] = {
14983  Op,
14984  DAG.getConstant(NumBits, VT),
14985  DAG.getConstant(X86::COND_E, MVT::i8),
14986  Op.getValue(1)
14987  };
14988  return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
14989 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getValue(unsigned R) const
unsigned getSizeInBits() const
SDVTList getVTList(EVT VT)
const SDValue & getOperand(unsigned i) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerEXTRACT_SUBVECTOR ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 10087 of file X86ISelLowering.cpp.

10088  {
10089  SDLoc dl(Op);
10090  SDValue In = Op.getOperand(0);
10091  SDValue Idx = Op.getOperand(1);
10092  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
10093  MVT ResVT = Op.getSimpleValueType();
10094  MVT InVT = In.getSimpleValueType();
10095 
10096  if (Subtarget->hasFp256()) {
10097  if (ResVT.is128BitVector() &&
10098  (InVT.is256BitVector() || InVT.is512BitVector()) &&
10099  isa<ConstantSDNode>(Idx)) {
10100  return Extract128BitVector(In, IdxVal, DAG, dl);
10101  }
10102  if (ResVT.is256BitVector() && InVT.is512BitVector() &&
10103  isa<ConstantSDNode>(Idx)) {
10104  return Extract256BitVector(In, IdxVal, DAG, dl);
10105  }
10106  }
10107  return SDValue();
10108 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
Generate a DAG to grab 256-bits from a 512-bit vector.
const SDValue & getOperand(unsigned i) const
bool hasFp256() const
Definition: X86Subtarget.h:320
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4 ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 9708 of file X86ISelLowering.cpp.

9708  {
9709  MVT VT = Op.getSimpleValueType();
9710  SDLoc dl(Op);
9711 
9713  return SDValue();
9714 
9715  if (VT.getSizeInBits() == 8) {
9716  SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
9717  Op.getOperand(0), Op.getOperand(1));
9718  SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
9719  DAG.getValueType(VT));
9720  return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
9721  }
9722 
9723  if (VT.getSizeInBits() == 16) {
9724  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
9725  // If Idx is 0, it's cheaper to do a move instead of a pextrw.
9726  if (Idx == 0)
9727  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
9728  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
9729  DAG.getNode(ISD::BITCAST, dl,
9730  MVT::v4i32,
9731  Op.getOperand(0)),
9732  Op.getOperand(1)));
9733  SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
9734  Op.getOperand(0), Op.getOperand(1));
9735  SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
9736  DAG.getValueType(VT));
9737  return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
9738  }
9739 
9740  if (VT == MVT::f32) {
9741  // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
9742  // the result back to FR32 register. It's only worth matching if the
9743  // result has a single use which is a store or a bitcast to i32. And in
9744  // the case of a store, it's not worth it if the index is a constant 0,
9745  // because a MOVSSmr can be used instead, which is smaller and faster.
9746  if (!Op.hasOneUse())
9747  return SDValue();
9748  SDNode *User = *Op.getNode()->use_begin();
9749  if ((User->getOpcode() != ISD::STORE ||
9750  (isa<ConstantSDNode>(Op.getOperand(1)) &&
9751  cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
9752  (User->getOpcode() != ISD::BITCAST ||
9753  User->getValueType(0) != MVT::i32))
9754  return SDValue();
9755  SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
9756  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
9757  Op.getOperand(0)),
9758  Op.getOperand(1));
9759  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
9760  }
9761 
9762  if (VT == MVT::i32 || VT == MVT::i64) {
9763  // ExtractPS/pextrq works with constant index.
9764  if (isa<ConstantSDNode>(Op.getOperand(1)))
9765  return Op;
9766  }
9767  return SDValue();
9768 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasOneUse() const
unsigned getOpcode() const
unsigned getSizeInBits() const
EVT getValueType(unsigned ResNo) const
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
Extract files back to file system.
Definition: llvm-ar.cpp:110
use_iterator use_begin() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getValueType(EVT)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
#define Assert(C, M)
Definition: Lint.cpp:161
static SDValue LowerFABS ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11378 of file X86ISelLowering.cpp.

11378  {
11379  LLVMContext *Context = DAG.getContext();
11380  SDLoc dl(Op);
11381  MVT VT = Op.getSimpleValueType();
11382  MVT EltVT = VT;
11383  unsigned NumElts = VT == MVT::f64 ? 2 : 4;
11384  if (VT.isVector()) {
11385  EltVT = VT.getVectorElementType();
11386  NumElts = VT.getVectorNumElements();
11387  }
11388  Constant *C;
11389  if (EltVT == MVT::f64)
11390  C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
11391  APInt(64, ~(1ULL << 63))));
11392  else
11393  C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
11394  APInt(32, ~(1U << 31))));
11395  C = ConstantVector::getSplat(NumElts, C);
11396  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11397  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
11398  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
11399  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
11400  MachinePointerInfo::getConstantPool(),
11401  false, false, false, Alignment);
11402  if (VT.isVector()) {
11403  MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
11404  return DAG.getNode(ISD::BITCAST, dl, VT,
11405  DAG.getNode(ISD::AND, dl, ANDVT,
11406  DAG.getNode(ISD::BITCAST, dl, ANDVT,
11407  Op.getOperand(0)),
11408  DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask)));
11409  }
11410  return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
11411 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
virtual MVT getPointerTy(uint32_t=0) const
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
LLVM Constant Representation.
Definition: Constant.h:41
bool isVector() const
isVector - Return true if this is a vector value type.
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getVectorElementType() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerFCOPYSIGN ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11449 of file X86ISelLowering.cpp.

11449  {
11450  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11451  LLVMContext *Context = DAG.getContext();
11452  SDValue Op0 = Op.getOperand(0);
11453  SDValue Op1 = Op.getOperand(1);
11454  SDLoc dl(Op);
11455  MVT VT = Op.getSimpleValueType();
11456  MVT SrcVT = Op1.getSimpleValueType();
11457 
11458  // If second operand is smaller, extend it first.
11459  if (SrcVT.bitsLT(VT)) {
11460  Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
11461  SrcVT = VT;
11462  }
11463  // And if it is bigger, shrink it first.
11464  if (SrcVT.bitsGT(VT)) {
11465  Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
11466  SrcVT = VT;
11467  }
11468 
11469  // At this point the operands and the result should have the same
11470  // type, and that won't be f80 since that is not custom lowered.
11471 
11472  // First get the sign bit of second operand.
11474  if (SrcVT == MVT::f64) {
11475  const fltSemantics &Sem = APFloat::IEEEdouble;
11476  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
11477  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
11478  } else {
11479  const fltSemantics &Sem = APFloat::IEEEsingle;
11480  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
11481  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11482  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11483  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11484  }
11485  Constant *C = ConstantVector::get(CV);
11486  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
11487  SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
11488  MachinePointerInfo::getConstantPool(),
11489  false, false, false, 16);
11490  SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
11491 
11492  // Shift sign bit right or left if the two operands have different types.
11493  if (SrcVT.bitsGT(VT)) {
11494  // Op0 is MVT::f32, Op1 is MVT::f64.
11495  SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
11496  SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
11497  DAG.getConstant(32, MVT::i32));
11498  SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
11499  SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
11500  DAG.getIntPtrConstant(0));
11501  }
11502 
11503  // Clear first operand sign bit.
11504  CV.clear();
11505  if (VT == MVT::f64) {
11506  const fltSemantics &Sem = APFloat::IEEEdouble;
11507  CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
11508  APInt(64, ~(1ULL << 63)))));
11509  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
11510  } else {
11511  const fltSemantics &Sem = APFloat::IEEEsingle;
11512  CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
11513  APInt(32, ~(1U << 31)))));
11514  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11515  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11516  CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
11517  }
11518  C = ConstantVector::get(CV);
11519  CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
11520  SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
11521  MachinePointerInfo::getConstantPool(),
11522  false, false, false, 16);
11523  SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
11524 
11525  // Or the value with the sign bit.
11526  return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
11527 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
virtual MVT getPointerTy(uint32_t=0) const
const SDValue & getOperand(unsigned i) const
LLVM Constant Representation.
Definition: Constant.h:41
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:447
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerFGETSIGN ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11529 of file X86ISelLowering.cpp.

11529  {
11530  SDValue N0 = Op.getOperand(0);
11531  SDLoc dl(Op);
11532  MVT VT = Op.getSimpleValueType();
11533 
11534  // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
11535  SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
11536  DAG.getConstant(1, VT));
11537  return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
11538 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerFNEG ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11413 of file X86ISelLowering.cpp.

11413  {
11414  LLVMContext *Context = DAG.getContext();
11415  SDLoc dl(Op);
11416  MVT VT = Op.getSimpleValueType();
11417  MVT EltVT = VT;
11418  unsigned NumElts = VT == MVT::f64 ? 2 : 4;
11419  if (VT.isVector()) {
11420  EltVT = VT.getVectorElementType();
11421  NumElts = VT.getVectorNumElements();
11422  }
11423  Constant *C;
11424  if (EltVT == MVT::f64)
11425  C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
11426  APInt(64, 1ULL << 63)));
11427  else
11428  C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
11429  APInt(32, 1U << 31)));
11430  C = ConstantVector::getSplat(NumElts, C);
11431  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11432  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
11433  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
11434  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
11435  MachinePointerInfo::getConstantPool(),
11436  false, false, false, Alignment);
11437  if (VT.isVector()) {
11438  MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
11439  return DAG.getNode(ISD::BITCAST, dl, VT,
11440  DAG.getNode(ISD::XOR, dl, XORVT,
11441  DAG.getNode(ISD::BITCAST, dl, XORVT,
11442  Op.getOperand(0)),
11443  DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
11444  }
11445 
11446  return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
11447 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
unsigned getSizeInBits() const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
virtual MVT getPointerTy(uint32_t=0) const
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
LLVM Constant Representation.
Definition: Constant.h:41
bool isVector() const
isVector - Return true if this is a vector value type.
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getVectorElementType() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerFP_EXTEND ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11365 of file X86ISelLowering.cpp.

11365  {
11366  SDLoc DL(Op);
11367  MVT VT = Op.getSimpleValueType();
11368  SDValue In = Op.getOperand(0);
11369  MVT SVT = In.getSimpleValueType();
11370 
11371  assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
11372 
11373  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
11374  DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
11375  In, DAG.getUNDEF(SVT)));
11376 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue LowerFSINCOS ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 16154 of file X86ISelLowering.cpp.

16155  {
16156  assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
16157 
16158  // For MacOSX, we want to call an alternative entry point: __sincos_stret,
16159  // which returns the values as { float, float } (in XMM0) or
16160  // { double, double } (which is returned in XMM0, XMM1).
16161  SDLoc dl(Op);
16162  SDValue Arg = Op.getOperand(0);
16163  EVT ArgVT = Arg.getValueType();
16164  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
16165 
16168 
16169  Entry.Node = Arg;
16170  Entry.Ty = ArgTy;
16171  Entry.isSExt = false;
16172  Entry.isZExt = false;
16173  Args.push_back(Entry);
16174 
16175  bool isF64 = ArgVT == MVT::f64;
16176  // Only optimize x86_64 for now. i386 is a bit messy. For f32,
16177  // the small struct {f32, f32} is returned in (eax, edx). For f64,
16178  // the results are returned via SRet in memory.
16179  const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
16180  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16181  SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
16182 
16183  Type *RetTy = isF64
16184  ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
16185  : (Type*)VectorType::get(ArgTy, 4);
16186 
16188  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
16189  .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
16190 
16191  std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
16192 
16193  if (isF64)
16194  // Returned in xmm0 and xmm1.
16195  return CallResult.first;
16196 
16197  // Returned in bits 0:31 and 32:64 xmm0.
16198  SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
16199  CallResult.first, DAG.getIntPtrConstant(0));
16200  SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
16201  CallResult.first, DAG.getIntPtrConstant(1));
16202  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
16203  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
16204 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
SDValue getExternalSymbol(const char *Sym, EVT VT)
bool isTargetDarwin() const
Definition: X86Subtarget.h:370
SDVTList getVTList(EVT VT)
virtual MVT getPointerTy(uint32_t=0) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
std::vector< ArgListEntry > ArgListTy
const char * Args[]
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerINSERT_SUBVECTOR ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 10113 of file X86ISelLowering.cpp.

10114  {
10115  if (Subtarget->hasFp256()) {
10116  SDLoc dl(Op.getNode());
10117  SDValue Vec = Op.getNode()->getOperand(0);
10118  SDValue SubVec = Op.getNode()->getOperand(1);
10119  SDValue Idx = Op.getNode()->getOperand(2);
10120 
10121  if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
10123  SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
10124  isa<ConstantSDNode>(Idx)) {
10125  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
10126  return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
10127  }
10128 
10129  if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
10130  SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
10131  isa<ConstantSDNode>(Idx)) {
10132  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
10133  return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
10134  }
10135  }
10136  return SDValue();
10137 }
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
const SDValue & getOperand(unsigned Num) const
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasFp256() const
Definition: X86Subtarget.h:320
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerINSERT_VECTOR_ELT_SSE4 ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 9917 of file X86ISelLowering.cpp.

9917  {
9918  MVT VT = Op.getSimpleValueType();
9919  MVT EltVT = VT.getVectorElementType();
9920  SDLoc dl(Op);
9921 
9922  SDValue N0 = Op.getOperand(0);
9923  SDValue N1 = Op.getOperand(1);
9924  SDValue N2 = Op.getOperand(2);
9925 
9926  if (!VT.is128BitVector())
9927  return SDValue();
9928 
9929  if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
9930  isa<ConstantSDNode>(N2)) {
9931  unsigned Opc;
9932  if (VT == MVT::v8i16)
9933  Opc = X86ISD::PINSRW;
9934  else if (VT == MVT::v16i8)
9935  Opc = X86ISD::PINSRB;
9936  else
9937  Opc = X86ISD::PINSRB;
9938 
9939  // Transform it so it match pinsr{b,w} which expects a GR32 as its second
9940  // argument.
9941  if (N1.getValueType() != MVT::i32)
9942  N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
9943  if (N2.getValueType() != MVT::i32)
9944  N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
9945  return DAG.getNode(Opc, dl, VT, N0, N1, N2);
9946  }
9947 
9948  if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
9949  // Bits [7:6] of the constant are the source select. This will always be
9950  // zero here. The DAG Combiner may combine an extract_elt index into these
9951  // bits. For example (insert (extract, 3), 2) could be matched by putting
9952  // the '3' into bits [7:6] of X86ISD::INSERTPS.
9953  // Bits [5:4] of the constant are the destination select. This is the
9954  // value of the incoming immediate.
9955  // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
9956  // combine either bitwise AND or insert of float 0.0 to set these bits.
9957  N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
9958  // Create this as a scalar to vector..
9959  N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
9960  return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
9961  }
9962 
9963  if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa<ConstantSDNode>(N2)) {
9964  // PINSR* works with constant index.
9965  return Op;
9966  }
9967  return SDValue();
9968 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getSizeInBits() const
const SDValue & getOperand(unsigned i) const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
EVT getValueType() const
MVT getVectorElementType() const
static SDValue LowerINTRINSIC_W_CHAIN ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 14485 of file X86ISelLowering.cpp.

14486  {
14487  InitIntinsicsMap();
14488  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
14489  std::map < unsigned, IntrinsicData>::const_iterator itr = IntrMap.find(IntNo);
14490  if (itr == IntrMap.end())
14491  return SDValue();
14492 
14493  SDLoc dl(Op);
14494  IntrinsicData Intr = itr->second;
14495  switch(Intr.Type) {
14496  case RDSEED:
14497  case RDRAND: {
14498  // Emit the node with the right value type.
14499  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
14500  SDValue Result = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(0));
14501 
14502  // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
14503  // Otherwise return the value from Rand, which is always 0, casted to i32.
14504  SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
14505  DAG.getConstant(1, Op->getValueType(1)),
14506  DAG.getConstant(X86::COND_B, MVT::i32),
14507  SDValue(Result.getNode(), 1) };
14508  SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
14509  DAG.getVTList(Op->getValueType(1), MVT::Glue),
14510  Ops);
14511 
14512  // Return { result, isValid, chain }.
14513  return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
14514  SDValue(Result.getNode(), 2));
14515  }
14516  case GATHER: {
14517  //gather(v1, mask, index, base, scale);
14518  SDValue Chain = Op.getOperand(0);
14519  SDValue Src = Op.getOperand(2);
14520  SDValue Base = Op.getOperand(3);
14521  SDValue Index = Op.getOperand(4);
14522  SDValue Mask = Op.getOperand(5);
14523  SDValue Scale = Op.getOperand(6);
14524  return getGatherNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
14525  Subtarget);
14526  }
14527  case SCATTER: {
14528  //scatter(base, mask, index, v1, scale);
14529  SDValue Chain = Op.getOperand(0);
14530  SDValue Base = Op.getOperand(2);
14531  SDValue Mask = Op.getOperand(3);
14532  SDValue Index = Op.getOperand(4);
14533  SDValue Src = Op.getOperand(5);
14534  SDValue Scale = Op.getOperand(6);
14535  return getScatterNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
14536  }
14537  case PREFETCH: {
14538  SDValue Hint = Op.getOperand(6);
14539  unsigned HintVal;
14540  if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
14541  (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
14542  llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
14543  unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
14544  SDValue Chain = Op.getOperand(0);
14545  SDValue Mask = Op.getOperand(2);
14546  SDValue Index = Op.getOperand(3);
14547  SDValue Base = Op.getOperand(4);
14548  SDValue Scale = Op.getOperand(5);
14549  return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain);
14550  }
14551  // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
14552  case RDTSC: {
14553  SmallVector<SDValue, 2> Results;
14554  getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
14555  return DAG.getMergeValues(Results, dl);
14556  }
14557  // Read Performance Monitoring Counters.
14558  case RDPMC: {
14559  SmallVector<SDValue, 2> Results;
14560  getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
14561  return DAG.getMergeValues(Results, dl);
14562  }
14563  // XTEST intrinsics.
14564  case XTEST: {
14565  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
14566  SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
14567  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
14568  DAG.getConstant(X86::COND_NE, MVT::i8),
14569  InTrans);
14570  SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
14571  return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
14572  Ret, SDValue(InTrans.getNode(), 1));
14573  }
14574  }
14575  llvm_unreachable("Unknown Intrinsic Type");
14576 }
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain)
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget *Subtarget)
SDVTList getVTList() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
aarch64 collect AArch64 Collect Linker Optimization Hint(LOH)"
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain)
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
SDVTList getVTList(EVT VT)
static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl< SDValue > &Results)
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
IntrinsicType Type
std::map< unsigned, IntrinsicData > IntrMap
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static void getReadPerformanceCounter(SDNode *N, SDLoc DL, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl< SDValue > &Results)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
static void InitIntinsicsMap()
static SDValue LowerINTRINSIC_WO_CHAIN ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 13486 of file X86ISelLowering.cpp.

13486  {
13487  SDLoc dl(Op);
13488  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
13489  switch (IntNo) {
13490  default: return SDValue(); // Don't custom lower most intrinsics.
13491  // Comparison intrinsics.
13492  case Intrinsic::x86_sse_comieq_ss:
13493  case Intrinsic::x86_sse_comilt_ss:
13494  case Intrinsic::x86_sse_comile_ss:
13495  case Intrinsic::x86_sse_comigt_ss:
13496  case Intrinsic::x86_sse_comige_ss:
13497  case Intrinsic::x86_sse_comineq_ss:
13498  case Intrinsic::x86_sse_ucomieq_ss:
13499  case Intrinsic::x86_sse_ucomilt_ss:
13500  case Intrinsic::x86_sse_ucomile_ss:
13501  case Intrinsic::x86_sse_ucomigt_ss:
13502  case Intrinsic::x86_sse_ucomige_ss:
13503  case Intrinsic::x86_sse_ucomineq_ss:
13504  case Intrinsic::x86_sse2_comieq_sd:
13505  case Intrinsic::x86_sse2_comilt_sd:
13506  case Intrinsic::x86_sse2_comile_sd:
13507  case Intrinsic::x86_sse2_comigt_sd:
13508  case Intrinsic::x86_sse2_comige_sd:
13509  case Intrinsic::x86_sse2_comineq_sd:
13510  case Intrinsic::x86_sse2_ucomieq_sd:
13511  case Intrinsic::x86_sse2_ucomilt_sd:
13512  case Intrinsic::x86_sse2_ucomile_sd:
13513  case Intrinsic::x86_sse2_ucomigt_sd:
13514  case Intrinsic::x86_sse2_ucomige_sd:
13515  case Intrinsic::x86_sse2_ucomineq_sd: {
13516  unsigned Opc;
13517  ISD::CondCode CC;
13518  switch (IntNo) {
13519  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13520  case Intrinsic::x86_sse_comieq_ss:
13521  case Intrinsic::x86_sse2_comieq_sd:
13522  Opc = X86ISD::COMI;
13523  CC = ISD::SETEQ;
13524  break;
13525  case Intrinsic::x86_sse_comilt_ss:
13526  case Intrinsic::x86_sse2_comilt_sd:
13527  Opc = X86ISD::COMI;
13528  CC = ISD::SETLT;
13529  break;
13530  case Intrinsic::x86_sse_comile_ss:
13531  case Intrinsic::x86_sse2_comile_sd:
13532  Opc = X86ISD::COMI;
13533  CC = ISD::SETLE;
13534  break;
13535  case Intrinsic::x86_sse_comigt_ss:
13536  case Intrinsic::x86_sse2_comigt_sd:
13537  Opc = X86ISD::COMI;
13538  CC = ISD::SETGT;
13539  break;
13540  case Intrinsic::x86_sse_comige_ss:
13541  case Intrinsic::x86_sse2_comige_sd:
13542  Opc = X86ISD::COMI;
13543  CC = ISD::SETGE;
13544  break;
13545  case Intrinsic::x86_sse_comineq_ss:
13546  case Intrinsic::x86_sse2_comineq_sd:
13547  Opc = X86ISD::COMI;
13548  CC = ISD::SETNE;
13549  break;
13550  case Intrinsic::x86_sse_ucomieq_ss:
13551  case Intrinsic::x86_sse2_ucomieq_sd:
13552  Opc = X86ISD::UCOMI;
13553  CC = ISD::SETEQ;
13554  break;
13555  case Intrinsic::x86_sse_ucomilt_ss:
13556  case Intrinsic::x86_sse2_ucomilt_sd:
13557  Opc = X86ISD::UCOMI;
13558  CC = ISD::SETLT;
13559  break;
13560  case Intrinsic::x86_sse_ucomile_ss:
13561  case Intrinsic::x86_sse2_ucomile_sd:
13562  Opc = X86ISD::UCOMI;
13563  CC = ISD::SETLE;
13564  break;
13565  case Intrinsic::x86_sse_ucomigt_ss:
13566  case Intrinsic::x86_sse2_ucomigt_sd:
13567  Opc = X86ISD::UCOMI;
13568  CC = ISD::SETGT;
13569  break;
13570  case Intrinsic::x86_sse_ucomige_ss:
13571  case Intrinsic::x86_sse2_ucomige_sd:
13572  Opc = X86ISD::UCOMI;
13573  CC = ISD::SETGE;
13574  break;
13575  case Intrinsic::x86_sse_ucomineq_ss:
13576  case Intrinsic::x86_sse2_ucomineq_sd:
13577  Opc = X86ISD::UCOMI;
13578  CC = ISD::SETNE;
13579  break;
13580  }
13581 
13582  SDValue LHS = Op.getOperand(1);
13583  SDValue RHS = Op.getOperand(2);
13584  unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
13585  assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
13586  SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
13587  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
13588  DAG.getConstant(X86CC, MVT::i8), Cond);
13589  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
13590  }
13591 
13592  // Arithmetic intrinsics.
13593  case Intrinsic::x86_sse2_pmulu_dq:
13594  case Intrinsic::x86_avx2_pmulu_dq:
13595  return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
13596  Op.getOperand(1), Op.getOperand(2));
13597 
13598  case Intrinsic::x86_sse41_pmuldq:
13599  case Intrinsic::x86_avx2_pmul_dq:
13600  return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(),
13601  Op.getOperand(1), Op.getOperand(2));
13602 
13603  case Intrinsic::x86_sse2_pmulhu_w:
13604  case Intrinsic::x86_avx2_pmulhu_w:
13605  return DAG.getNode(ISD::MULHU, dl, Op.getValueType(),
13606  Op.getOperand(1), Op.getOperand(2));
13607 
13608  case Intrinsic::x86_sse2_pmulh_w:
13609  case Intrinsic::x86_avx2_pmulh_w:
13610  return DAG.getNode(ISD::MULHS, dl, Op.getValueType(),
13611  Op.getOperand(1), Op.getOperand(2));
13612 
13613  // SSE2/AVX2 sub with unsigned saturation intrinsics
13614  case Intrinsic::x86_sse2_psubus_b:
13615  case Intrinsic::x86_sse2_psubus_w:
13616  case Intrinsic::x86_avx2_psubus_b:
13617  case Intrinsic::x86_avx2_psubus_w:
13618  return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
13619  Op.getOperand(1), Op.getOperand(2));
13620 
13621  // SSE3/AVX horizontal add/sub intrinsics
13622  case Intrinsic::x86_sse3_hadd_ps:
13623  case Intrinsic::x86_sse3_hadd_pd:
13624  case Intrinsic::x86_avx_hadd_ps_256:
13625  case Intrinsic::x86_avx_hadd_pd_256:
13626  case Intrinsic::x86_sse3_hsub_ps:
13627  case Intrinsic::x86_sse3_hsub_pd:
13628  case Intrinsic::x86_avx_hsub_ps_256:
13629  case Intrinsic::x86_avx_hsub_pd_256:
13630  case Intrinsic::x86_ssse3_phadd_w_128:
13631  case Intrinsic::x86_ssse3_phadd_d_128:
13632  case Intrinsic::x86_avx2_phadd_w:
13633  case Intrinsic::x86_avx2_phadd_d:
13634  case Intrinsic::x86_ssse3_phsub_w_128:
13635  case Intrinsic::x86_ssse3_phsub_d_128:
13636  case Intrinsic::x86_avx2_phsub_w:
13637  case Intrinsic::x86_avx2_phsub_d: {
13638  unsigned Opcode;
13639  switch (IntNo) {
13640  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13641  case Intrinsic::x86_sse3_hadd_ps:
13642  case Intrinsic::x86_sse3_hadd_pd:
13643  case Intrinsic::x86_avx_hadd_ps_256:
13644  case Intrinsic::x86_avx_hadd_pd_256:
13645  Opcode = X86ISD::FHADD;
13646  break;
13647  case Intrinsic::x86_sse3_hsub_ps:
13648  case Intrinsic::x86_sse3_hsub_pd:
13649  case Intrinsic::x86_avx_hsub_ps_256:
13650  case Intrinsic::x86_avx_hsub_pd_256:
13651  Opcode = X86ISD::FHSUB;
13652  break;
13653  case Intrinsic::x86_ssse3_phadd_w_128:
13654  case Intrinsic::x86_ssse3_phadd_d_128:
13655  case Intrinsic::x86_avx2_phadd_w:
13656  case Intrinsic::x86_avx2_phadd_d:
13657  Opcode = X86ISD::HADD;
13658  break;
13659  case Intrinsic::x86_ssse3_phsub_w_128:
13660  case Intrinsic::x86_ssse3_phsub_d_128:
13661  case Intrinsic::x86_avx2_phsub_w:
13662  case Intrinsic::x86_avx2_phsub_d:
13663  Opcode = X86ISD::HSUB;
13664  break;
13665  }
13666  return DAG.getNode(Opcode, dl, Op.getValueType(),
13667  Op.getOperand(1), Op.getOperand(2));
13668  }
13669 
13670  // SSE2/SSE41/AVX2 integer max/min intrinsics.
13671  case Intrinsic::x86_sse2_pmaxu_b:
13672  case Intrinsic::x86_sse41_pmaxuw:
13673  case Intrinsic::x86_sse41_pmaxud:
13674  case Intrinsic::x86_avx2_pmaxu_b:
13675  case Intrinsic::x86_avx2_pmaxu_w:
13676  case Intrinsic::x86_avx2_pmaxu_d:
13677  case Intrinsic::x86_sse2_pminu_b:
13678  case Intrinsic::x86_sse41_pminuw:
13679  case Intrinsic::x86_sse41_pminud:
13680  case Intrinsic::x86_avx2_pminu_b:
13681  case Intrinsic::x86_avx2_pminu_w:
13682  case Intrinsic::x86_avx2_pminu_d:
13683  case Intrinsic::x86_sse41_pmaxsb:
13684  case Intrinsic::x86_sse2_pmaxs_w:
13685  case Intrinsic::x86_sse41_pmaxsd:
13686  case Intrinsic::x86_avx2_pmaxs_b:
13687  case Intrinsic::x86_avx2_pmaxs_w:
13688  case Intrinsic::x86_avx2_pmaxs_d:
13689  case Intrinsic::x86_sse41_pminsb:
13690  case Intrinsic::x86_sse2_pmins_w:
13691  case Intrinsic::x86_sse41_pminsd:
13692  case Intrinsic::x86_avx2_pmins_b:
13693  case Intrinsic::x86_avx2_pmins_w:
13694  case Intrinsic::x86_avx2_pmins_d: {
13695  unsigned Opcode;
13696  switch (IntNo) {
13697  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13698  case Intrinsic::x86_sse2_pmaxu_b:
13699  case Intrinsic::x86_sse41_pmaxuw:
13700  case Intrinsic::x86_sse41_pmaxud:
13701  case Intrinsic::x86_avx2_pmaxu_b:
13702  case Intrinsic::x86_avx2_pmaxu_w:
13703  case Intrinsic::x86_avx2_pmaxu_d:
13704  Opcode = X86ISD::UMAX;
13705  break;
13706  case Intrinsic::x86_sse2_pminu_b:
13707  case Intrinsic::x86_sse41_pminuw:
13708  case Intrinsic::x86_sse41_pminud:
13709  case Intrinsic::x86_avx2_pminu_b:
13710  case Intrinsic::x86_avx2_pminu_w:
13711  case Intrinsic::x86_avx2_pminu_d:
13712  Opcode = X86ISD::UMIN;
13713  break;
13714  case Intrinsic::x86_sse41_pmaxsb:
13715  case Intrinsic::x86_sse2_pmaxs_w:
13716  case Intrinsic::x86_sse41_pmaxsd:
13717  case Intrinsic::x86_avx2_pmaxs_b:
13718  case Intrinsic::x86_avx2_pmaxs_w:
13719  case Intrinsic::x86_avx2_pmaxs_d:
13720  Opcode = X86ISD::SMAX;
13721  break;
13722  case Intrinsic::x86_sse41_pminsb:
13723  case Intrinsic::x86_sse2_pmins_w:
13724  case Intrinsic::x86_sse41_pminsd:
13725  case Intrinsic::x86_avx2_pmins_b:
13726  case Intrinsic::x86_avx2_pmins_w:
13727  case Intrinsic::x86_avx2_pmins_d:
13728  Opcode = X86ISD::SMIN;
13729  break;
13730  }
13731  return DAG.getNode(Opcode, dl, Op.getValueType(),
13732  Op.getOperand(1), Op.getOperand(2));
13733  }
13734 
13735  // SSE/SSE2/AVX floating point max/min intrinsics.
13736  case Intrinsic::x86_sse_max_ps:
13737  case Intrinsic::x86_sse2_max_pd:
13738  case Intrinsic::x86_avx_max_ps_256:
13739  case Intrinsic::x86_avx_max_pd_256:
13740  case Intrinsic::x86_sse_min_ps:
13741  case Intrinsic::x86_sse2_min_pd:
13742  case Intrinsic::x86_avx_min_ps_256:
13743  case Intrinsic::x86_avx_min_pd_256: {
13744  unsigned Opcode;
13745  switch (IntNo) {
13746  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13747  case Intrinsic::x86_sse_max_ps:
13748  case Intrinsic::x86_sse2_max_pd:
13749  case Intrinsic::x86_avx_max_ps_256:
13750  case Intrinsic::x86_avx_max_pd_256:
13751  Opcode = X86ISD::FMAX;
13752  break;
13753  case Intrinsic::x86_sse_min_ps:
13754  case Intrinsic::x86_sse2_min_pd:
13755  case Intrinsic::x86_avx_min_ps_256:
13756  case Intrinsic::x86_avx_min_pd_256:
13757  Opcode = X86ISD::FMIN;
13758  break;
13759  }
13760  return DAG.getNode(Opcode, dl, Op.getValueType(),
13761  Op.getOperand(1), Op.getOperand(2));
13762  }
13763 
13764  // AVX2 variable shift intrinsics
13765  case Intrinsic::x86_avx2_psllv_d:
13766  case Intrinsic::x86_avx2_psllv_q:
13767  case Intrinsic::x86_avx2_psllv_d_256:
13768  case Intrinsic::x86_avx2_psllv_q_256:
13769  case Intrinsic::x86_avx2_psrlv_d:
13770  case Intrinsic::x86_avx2_psrlv_q:
13771  case Intrinsic::x86_avx2_psrlv_d_256:
13772  case Intrinsic::x86_avx2_psrlv_q_256:
13773  case Intrinsic::x86_avx2_psrav_d:
13774  case Intrinsic::x86_avx2_psrav_d_256: {
13775  unsigned Opcode;
13776  switch (IntNo) {
13777  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13778  case Intrinsic::x86_avx2_psllv_d:
13779  case Intrinsic::x86_avx2_psllv_q:
13780  case Intrinsic::x86_avx2_psllv_d_256:
13781  case Intrinsic::x86_avx2_psllv_q_256:
13782  Opcode = ISD::SHL;
13783  break;
13784  case Intrinsic::x86_avx2_psrlv_d:
13785  case Intrinsic::x86_avx2_psrlv_q:
13786  case Intrinsic::x86_avx2_psrlv_d_256:
13787  case Intrinsic::x86_avx2_psrlv_q_256:
13788  Opcode = ISD::SRL;
13789  break;
13790  case Intrinsic::x86_avx2_psrav_d:
13791  case Intrinsic::x86_avx2_psrav_d_256:
13792  Opcode = ISD::SRA;
13793  break;
13794  }
13795  return DAG.getNode(Opcode, dl, Op.getValueType(),
13796  Op.getOperand(1), Op.getOperand(2));
13797  }
13798 
13799  case Intrinsic::x86_sse2_packssdw_128:
13800  case Intrinsic::x86_sse2_packsswb_128:
13801  case Intrinsic::x86_avx2_packssdw:
13802  case Intrinsic::x86_avx2_packsswb:
13803  return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
13804  Op.getOperand(1), Op.getOperand(2));
13805 
13806  case Intrinsic::x86_sse2_packuswb_128:
13807  case Intrinsic::x86_sse41_packusdw:
13808  case Intrinsic::x86_avx2_packuswb:
13809  case Intrinsic::x86_avx2_packusdw:
13810  return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
13811  Op.getOperand(1), Op.getOperand(2));
13812 
13813  case Intrinsic::x86_ssse3_pshuf_b_128:
13814  case Intrinsic::x86_avx2_pshuf_b:
13815  return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
13816  Op.getOperand(1), Op.getOperand(2));
13817 
13818  case Intrinsic::x86_sse2_pshuf_d:
13819  return DAG.getNode(X86ISD::PSHUFD, dl, Op.getValueType(),
13820  Op.getOperand(1), Op.getOperand(2));
13821 
13822  case Intrinsic::x86_sse2_pshufl_w:
13823  return DAG.getNode(X86ISD::PSHUFLW, dl, Op.getValueType(),
13824  Op.getOperand(1), Op.getOperand(2));
13825 
13826  case Intrinsic::x86_sse2_pshufh_w:
13827  return DAG.getNode(X86ISD::PSHUFHW, dl, Op.getValueType(),
13828  Op.getOperand(1), Op.getOperand(2));
13829 
13830  case Intrinsic::x86_ssse3_psign_b_128:
13831  case Intrinsic::x86_ssse3_psign_w_128:
13832  case Intrinsic::x86_ssse3_psign_d_128:
13833  case Intrinsic::x86_avx2_psign_b:
13834  case Intrinsic::x86_avx2_psign_w:
13835  case Intrinsic::x86_avx2_psign_d:
13836  return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
13837  Op.getOperand(1), Op.getOperand(2));
13838 
13839  case Intrinsic::x86_sse41_insertps:
13840  return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
13841  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
13842 
13843  case Intrinsic::x86_avx_vperm2f128_ps_256:
13844  case Intrinsic::x86_avx_vperm2f128_pd_256:
13845  case Intrinsic::x86_avx_vperm2f128_si_256:
13846  case Intrinsic::x86_avx2_vperm2i128:
13847  return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
13848  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
13849 
13850  case Intrinsic::x86_avx2_permd:
13851  case Intrinsic::x86_avx2_permps:
13852  // Operands intentionally swapped. Mask is last operand to intrinsic,
13853  // but second operand for node/instruction.
13854  return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
13855  Op.getOperand(2), Op.getOperand(1));
13856 
13857  case Intrinsic::x86_sse_sqrt_ps:
13858  case Intrinsic::x86_sse2_sqrt_pd:
13859  case Intrinsic::x86_avx_sqrt_ps_256:
13860  case Intrinsic::x86_avx_sqrt_pd_256:
13861  return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
13862 
13863  // ptest and testp intrinsics. The intrinsic these come from are designed to
13864  // return an integer value, not just an instruction so lower it to the ptest
13865  // or testp pattern and a setcc for the result.
13866  case Intrinsic::x86_sse41_ptestz:
13867  case Intrinsic::x86_sse41_ptestc:
13868  case Intrinsic::x86_sse41_ptestnzc:
13869  case Intrinsic::x86_avx_ptestz_256:
13870  case Intrinsic::x86_avx_ptestc_256:
13871  case Intrinsic::x86_avx_ptestnzc_256:
13872  case Intrinsic::x86_avx_vtestz_ps:
13873  case Intrinsic::x86_avx_vtestc_ps:
13874  case Intrinsic::x86_avx_vtestnzc_ps:
13875  case Intrinsic::x86_avx_vtestz_pd:
13876  case Intrinsic::x86_avx_vtestc_pd:
13877  case Intrinsic::x86_avx_vtestnzc_pd:
13878  case Intrinsic::x86_avx_vtestz_ps_256:
13879  case Intrinsic::x86_avx_vtestc_ps_256:
13880  case Intrinsic::x86_avx_vtestnzc_ps_256:
13881  case Intrinsic::x86_avx_vtestz_pd_256:
13882  case Intrinsic::x86_avx_vtestc_pd_256:
13883  case Intrinsic::x86_avx_vtestnzc_pd_256: {
13884  bool IsTestPacked = false;
13885  unsigned X86CC;
13886  switch (IntNo) {
13887  default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
13888  case Intrinsic::x86_avx_vtestz_ps:
13889  case Intrinsic::x86_avx_vtestz_pd:
13890  case Intrinsic::x86_avx_vtestz_ps_256:
13891  case Intrinsic::x86_avx_vtestz_pd_256:
13892  IsTestPacked = true; // Fallthrough
13893  case Intrinsic::x86_sse41_ptestz:
13894  case Intrinsic::x86_avx_ptestz_256:
13895  // ZF = 1
13896  X86CC = X86::COND_E;
13897  break;
13898  case Intrinsic::x86_avx_vtestc_ps:
13899  case Intrinsic::x86_avx_vtestc_pd:
13900  case Intrinsic::x86_avx_vtestc_ps_256:
13901  case Intrinsic::x86_avx_vtestc_pd_256:
13902  IsTestPacked = true; // Fallthrough
13903  case Intrinsic::x86_sse41_ptestc:
13904  case Intrinsic::x86_avx_ptestc_256:
13905  // CF = 1
13906  X86CC = X86::COND_B;
13907  break;
13908  case Intrinsic::x86_avx_vtestnzc_ps:
13909  case Intrinsic::x86_avx_vtestnzc_pd:
13910  case Intrinsic::x86_avx_vtestnzc_ps_256:
13911  case Intrinsic::x86_avx_vtestnzc_pd_256:
13912  IsTestPacked = true; // Fallthrough
13913  case Intrinsic::x86_sse41_ptestnzc:
13914  case Intrinsic::x86_avx_ptestnzc_256:
13915  // ZF and CF = 0
13916  X86CC = X86::COND_A;
13917  break;
13918  }
13919 
13920  SDValue LHS = Op.getOperand(1);
13921  SDValue RHS = Op.getOperand(2);
13922  unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
13923  SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
13924  SDValue CC = DAG.getConstant(X86CC, MVT::i8);
13925  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
13926  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
13927  }
13928  case Intrinsic::x86_avx512_kortestz_w:
13929  case Intrinsic::x86_avx512_kortestc_w: {
13930  unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
13931  SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
13932  SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
13933  SDValue CC = DAG.getConstant(X86CC, MVT::i8);
13934  SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
13935  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
13936  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
13937  }
13938 
13939  // SSE/AVX shift intrinsics
13940  case Intrinsic::x86_sse2_psll_w:
13941  case Intrinsic::x86_sse2_psll_d:
13942  case Intrinsic::x86_sse2_psll_q:
13943  case Intrinsic::x86_avx2_psll_w:
13944  case Intrinsic::x86_avx2_psll_d:
13945  case Intrinsic::x86_avx2_psll_q:
13946  case Intrinsic::x86_sse2_psrl_w:
13947  case Intrinsic::x86_sse2_psrl_d:
13948  case Intrinsic::x86_sse2_psrl_q:
13949  case Intrinsic::x86_avx2_psrl_w:
13950  case Intrinsic::x86_avx2_psrl_d:
13951  case Intrinsic::x86_avx2_psrl_q:
13952  case Intrinsic::x86_sse2_psra_w:
13953  case Intrinsic::x86_sse2_psra_d:
13954  case Intrinsic::x86_avx2_psra_w:
13955  case Intrinsic::x86_avx2_psra_d: {
13956  unsigned Opcode;
13957  switch (IntNo) {
13958  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
13959  case Intrinsic::x86_sse2_psll_w:
13960  case Intrinsic::x86_sse2_psll_d:
13961  case Intrinsic::x86_sse2_psll_q:
13962  case Intrinsic::x86_avx2_psll_w:
13963  case Intrinsic::x86_avx2_psll_d:
13964  case Intrinsic::x86_avx2_psll_q:
13965  Opcode = X86ISD::VSHL;
13966  break;
13967  case Intrinsic::x86_sse2_psrl_w:
13968  case Intrinsic::x86_sse2_psrl_d:
13969  case Intrinsic::x86_sse2_psrl_q:
13970  case Intrinsic::x86_avx2_psrl_w:
13971  case Intrinsic::x86_avx2_psrl_d:
13972  case Intrinsic::x86_avx2_psrl_q:
13973  Opcode = X86ISD::VSRL;
13974  break;
13975  case Intrinsic::x86_sse2_psra_w:
13976  case Intrinsic::x86_sse2_psra_d:
13977  case Intrinsic::x86_avx2_psra_w:
13978  case Intrinsic::x86_avx2_psra_d:
13979  Opcode = X86ISD::VSRA;
13980  break;
13981  }
13982  return DAG.getNode(Opcode, dl, Op.getValueType(),
13983  Op.getOperand(1), Op.getOperand(2));
13984  }
13985 
13986  // SSE/AVX immediate shift intrinsics
13987  case Intrinsic::x86_sse2_pslli_w:
13988  case Intrinsic::x86_sse2_pslli_d:
13989  case Intrinsic::x86_sse2_pslli_q:
13990  case Intrinsic::x86_avx2_pslli_w:
13991  case Intrinsic::x86_avx2_pslli_d:
13992  case Intrinsic::x86_avx2_pslli_q:
13993  case Intrinsic::x86_sse2_psrli_w:
13994  case Intrinsic::x86_sse2_psrli_d:
13995  case Intrinsic::x86_sse2_psrli_q:
13996  case Intrinsic::x86_avx2_psrli_w:
13997  case Intrinsic::x86_avx2_psrli_d:
13998  case Intrinsic::x86_avx2_psrli_q:
13999  case Intrinsic::x86_sse2_psrai_w:
14000  case Intrinsic::x86_sse2_psrai_d:
14001  case Intrinsic::x86_avx2_psrai_w:
14002  case Intrinsic::x86_avx2_psrai_d: {
14003  unsigned Opcode;
14004  switch (IntNo) {
14005  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
14006  case Intrinsic::x86_sse2_pslli_w:
14007  case Intrinsic::x86_sse2_pslli_d:
14008  case Intrinsic::x86_sse2_pslli_q:
14009  case Intrinsic::x86_avx2_pslli_w:
14010  case Intrinsic::x86_avx2_pslli_d:
14011  case Intrinsic::x86_avx2_pslli_q:
14012  Opcode = X86ISD::VSHLI;
14013  break;
14014  case Intrinsic::x86_sse2_psrli_w:
14015  case Intrinsic::x86_sse2_psrli_d:
14016  case Intrinsic::x86_sse2_psrli_q:
14017  case Intrinsic::x86_avx2_psrli_w:
14018  case Intrinsic::x86_avx2_psrli_d:
14019  case Intrinsic::x86_avx2_psrli_q:
14020  Opcode = X86ISD::VSRLI;
14021  break;
14022  case Intrinsic::x86_sse2_psrai_w:
14023  case Intrinsic::x86_sse2_psrai_d:
14024  case Intrinsic::x86_avx2_psrai_w:
14025  case Intrinsic::x86_avx2_psrai_d:
14026  Opcode = X86ISD::VSRAI;
14027  break;
14028  }
14029  return getTargetVShiftNode(Opcode, dl, Op.getSimpleValueType(),
14030  Op.getOperand(1), Op.getOperand(2), DAG);
14031  }
14032 
14033  case Intrinsic::x86_sse42_pcmpistria128:
14034  case Intrinsic::x86_sse42_pcmpestria128:
14035  case Intrinsic::x86_sse42_pcmpistric128:
14036  case Intrinsic::x86_sse42_pcmpestric128:
14037  case Intrinsic::x86_sse42_pcmpistrio128:
14038  case Intrinsic::x86_sse42_pcmpestrio128:
14039  case Intrinsic::x86_sse42_pcmpistris128:
14040  case Intrinsic::x86_sse42_pcmpestris128:
14041  case Intrinsic::x86_sse42_pcmpistriz128:
14042  case Intrinsic::x86_sse42_pcmpestriz128: {
14043  unsigned Opcode;
14044  unsigned X86CC;
14045  switch (IntNo) {
14046  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
14047  case Intrinsic::x86_sse42_pcmpistria128:
14048  Opcode = X86ISD::PCMPISTRI;
14049  X86CC = X86::COND_A;
14050  break;
14051  case Intrinsic::x86_sse42_pcmpestria128:
14052  Opcode = X86ISD::PCMPESTRI;
14053  X86CC = X86::COND_A;
14054  break;
14055  case Intrinsic::x86_sse42_pcmpistric128:
14056  Opcode = X86ISD::PCMPISTRI;
14057  X86CC = X86::COND_B;
14058  break;
14059  case Intrinsic::x86_sse42_pcmpestric128:
14060  Opcode = X86ISD::PCMPESTRI;
14061  X86CC = X86::COND_B;
14062  break;
14063  case Intrinsic::x86_sse42_pcmpistrio128:
14064  Opcode = X86ISD::PCMPISTRI;
14065  X86CC = X86::COND_O;
14066  break;
14067  case Intrinsic::x86_sse42_pcmpestrio128:
14068  Opcode = X86ISD::PCMPESTRI;
14069  X86CC = X86::COND_O;
14070  break;
14071  case Intrinsic::x86_sse42_pcmpistris128:
14072  Opcode = X86ISD::PCMPISTRI;
14073  X86CC = X86::COND_S;
14074  break;
14075  case Intrinsic::x86_sse42_pcmpestris128:
14076  Opcode = X86ISD::PCMPESTRI;
14077  X86CC = X86::COND_S;
14078  break;
14079  case Intrinsic::x86_sse42_pcmpistriz128:
14080  Opcode = X86ISD::PCMPISTRI;
14081  X86CC = X86::COND_E;
14082  break;
14083  case Intrinsic::x86_sse42_pcmpestriz128:
14084  Opcode = X86ISD::PCMPESTRI;
14085  X86CC = X86::COND_E;
14086  break;
14087  }
14088  SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
14089  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
14090  SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
14091  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
14092  DAG.getConstant(X86CC, MVT::i8),
14093  SDValue(PCMP.getNode(), 1));
14094  return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
14095  }
14096 
14097  case Intrinsic::x86_sse42_pcmpistri128:
14098  case Intrinsic::x86_sse42_pcmpestri128: {
14099  unsigned Opcode;
14100  if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
14101  Opcode = X86ISD::PCMPISTRI;
14102  else
14103  Opcode = X86ISD::PCMPESTRI;
14104 
14105  SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
14106  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
14107  return DAG.getNode(Opcode, dl, VTs, NewOps);
14108  }
14109  case Intrinsic::x86_fma_vfmadd_ps:
14110  case Intrinsic::x86_fma_vfmadd_pd:
14111  case Intrinsic::x86_fma_vfmsub_ps:
14112  case Intrinsic::x86_fma_vfmsub_pd:
14113  case Intrinsic::x86_fma_vfnmadd_ps:
14114  case Intrinsic::x86_fma_vfnmadd_pd:
14115  case Intrinsic::x86_fma_vfnmsub_ps:
14116  case Intrinsic::x86_fma_vfnmsub_pd:
14117  case Intrinsic::x86_fma_vfmaddsub_ps:
14118  case Intrinsic::x86_fma_vfmaddsub_pd:
14119  case Intrinsic::x86_fma_vfmsubadd_ps:
14120  case Intrinsic::x86_fma_vfmsubadd_pd:
14121  case Intrinsic::x86_fma_vfmadd_ps_256:
14122  case Intrinsic::x86_fma_vfmadd_pd_256:
14123  case Intrinsic::x86_fma_vfmsub_ps_256:
14124  case Intrinsic::x86_fma_vfmsub_pd_256:
14125  case Intrinsic::x86_fma_vfnmadd_ps_256:
14126  case Intrinsic::x86_fma_vfnmadd_pd_256:
14127  case Intrinsic::x86_fma_vfnmsub_ps_256:
14128  case Intrinsic::x86_fma_vfnmsub_pd_256:
14129  case Intrinsic::x86_fma_vfmaddsub_ps_256:
14130  case Intrinsic::x86_fma_vfmaddsub_pd_256:
14131  case Intrinsic::x86_fma_vfmsubadd_ps_256:
14132  case Intrinsic::x86_fma_vfmsubadd_pd_256:
14133  case Intrinsic::x86_fma_vfmadd_ps_512:
14134  case Intrinsic::x86_fma_vfmadd_pd_512:
14135  case Intrinsic::x86_fma_vfmsub_ps_512:
14136  case Intrinsic::x86_fma_vfmsub_pd_512:
14137  case Intrinsic::x86_fma_vfnmadd_ps_512:
14138  case Intrinsic::x86_fma_vfnmadd_pd_512:
14139  case Intrinsic::x86_fma_vfnmsub_ps_512:
14140  case Intrinsic::x86_fma_vfnmsub_pd_512:
14141  case Intrinsic::x86_fma_vfmaddsub_ps_512:
14142  case Intrinsic::x86_fma_vfmaddsub_pd_512:
14143  case Intrinsic::x86_fma_vfmsubadd_ps_512:
14144  case Intrinsic::x86_fma_vfmsubadd_pd_512: {
14145  unsigned Opc;
14146  switch (IntNo) {
14147  default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
14148  case Intrinsic::x86_fma_vfmadd_ps:
14149  case Intrinsic::x86_fma_vfmadd_pd:
14150  case Intrinsic::x86_fma_vfmadd_ps_256:
14151  case Intrinsic::x86_fma_vfmadd_pd_256:
14152  case Intrinsic::x86_fma_vfmadd_ps_512:
14153  case Intrinsic::x86_fma_vfmadd_pd_512:
14154  Opc = X86ISD::FMADD;
14155  break;
14156  case Intrinsic::x86_fma_vfmsub_ps:
14157  case Intrinsic::x86_fma_vfmsub_pd:
14158  case Intrinsic::x86_fma_vfmsub_ps_256:
14159  case Intrinsic::x86_fma_vfmsub_pd_256:
14160  case Intrinsic::x86_fma_vfmsub_ps_512:
14161  case Intrinsic::x86_fma_vfmsub_pd_512:
14162  Opc = X86ISD::FMSUB;
14163  break;
14164  case Intrinsic::x86_fma_vfnmadd_ps:
14165  case Intrinsic::x86_fma_vfnmadd_pd:
14166  case Intrinsic::x86_fma_vfnmadd_ps_256:
14167  case Intrinsic::x86_fma_vfnmadd_pd_256:
14168  case Intrinsic::x86_fma_vfnmadd_ps_512:
14169  case Intrinsic::x86_fma_vfnmadd_pd_512:
14170  Opc = X86ISD::FNMADD;
14171  break;
14172  case Intrinsic::x86_fma_vfnmsub_ps:
14173  case Intrinsic::x86_fma_vfnmsub_pd:
14174  case Intrinsic::x86_fma_vfnmsub_ps_256:
14175  case Intrinsic::x86_fma_vfnmsub_pd_256:
14176  case Intrinsic::x86_fma_vfnmsub_ps_512:
14177  case Intrinsic::x86_fma_vfnmsub_pd_512:
14178  Opc = X86ISD::FNMSUB;
14179  break;
14180  case Intrinsic::x86_fma_vfmaddsub_ps:
14181  case Intrinsic::x86_fma_vfmaddsub_pd:
14182  case Intrinsic::x86_fma_vfmaddsub_ps_256:
14183  case Intrinsic::x86_fma_vfmaddsub_pd_256:
14184  case Intrinsic::x86_fma_vfmaddsub_ps_512:
14185  case Intrinsic::x86_fma_vfmaddsub_pd_512:
14186  Opc = X86ISD::FMADDSUB;
14187  break;
14188  case Intrinsic::x86_fma_vfmsubadd_ps:
14189  case Intrinsic::x86_fma_vfmsubadd_pd:
14190  case Intrinsic::x86_fma_vfmsubadd_ps_256:
14191  case Intrinsic::x86_fma_vfmsubadd_pd_256:
14192  case Intrinsic::x86_fma_vfmsubadd_ps_512:
14193  case Intrinsic::x86_fma_vfmsubadd_pd_512:
14194  Opc = X86ISD::FMSUBADD;
14195  break;
14196  }
14197 
14198  return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
14199  Op.getOperand(2), Op.getOperand(3));
14200  }
14201  }
14202 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
PSIGN - Copy integer sign.
PSHUFB - Shuffle 16 8-bit values within a vector.
FHSUB - Floating point horizontal sub.
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG)
HSUB - Integer horizontal sub.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDVTList getVTList(EVT VT)
HADD - Integer horizontal add.
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG)
***NAME is the name of the raw_ostream unsigned & i1
op_iterator op_begin() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
op_iterator op_end() const
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
FHADD - Floating point horizontal add.
static SDValue LowerIntVSETCC_AVX512 ( SDValue  Op,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 12100 of file X86ISelLowering.cpp.

12101  {
12102  SDValue Op0 = Op.getOperand(0);
12103  SDValue Op1 = Op.getOperand(1);
12104  SDValue CC = Op.getOperand(2);
12105  MVT VT = Op.getSimpleValueType();
12106  SDLoc dl(Op);
12107 
12109  Op.getValueType().getScalarType() == MVT::i1 &&
12110  "Cannot set masked compare for this operation");
12111 
12112  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
12113  unsigned Opc = 0;
12114  bool Unsigned = false;
12115  bool Swap = false;
12116  unsigned SSECC;
12117  switch (SetCCOpcode) {
12118  default: llvm_unreachable("Unexpected SETCC condition");
12119  case ISD::SETNE: SSECC = 4; break;
12120  case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
12121  case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
12122  case ISD::SETLT: Swap = true; //fall-through
12123  case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
12124  case ISD::SETULT: SSECC = 1; Unsigned = true; break;
12125  case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
12126  case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
12127  case ISD::SETULE: Unsigned = true; //fall-through
12128  case ISD::SETLE: SSECC = 2; break;
12129  }
12130 
12131  if (Swap)
12132  std::swap(Op0, Op1);
12133  if (Opc)
12134  return DAG.getNode(Opc, dl, VT, Op0, Op1);
12135  Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
12136  return DAG.getNode(Opc, dl, VT, Op0, Op1,
12137  DAG.getConstant(SSECC, MVT::i8));
12138 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getScalarType() const
Definition: ValueTypes.h:211
EVT getVectorElementType() const
Definition: ValueTypes.h:217
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
***NAME is the name of the raw_ostream unsigned & i1
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerLOAD_SUB ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 16088 of file X86ISelLowering.cpp.

16088  {
16089  SDNode *Node = Op.getNode();
16090  SDLoc dl(Node);
16091  EVT T = Node->getValueType(0);
16092  SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
16093  DAG.getConstant(0, T), Node->getOperand(2));
16094  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
16095  cast<AtomicSDNode>(Node)->getMemoryVT(),
16096  Node->getOperand(0),
16097  Node->getOperand(1), negOp,
16098  cast<AtomicSDNode>(Node)->getMemOperand(),
16099  cast<AtomicSDNode>(Node)->getOrdering(),
16100  cast<AtomicSDNode>(Node)->getSynchScope());
16101 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
const DomTreeNodeT * Node
#define T
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerMUL ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 15034 of file X86ISelLowering.cpp.

15035  {
15036  SDLoc dl(Op);
15037  MVT VT = Op.getSimpleValueType();
15038 
15039  // Decompose 256-bit ops into smaller 128-bit ops.
15040  if (VT.is256BitVector() && !Subtarget->hasInt256())
15041  return Lower256IntArith(Op, DAG);
15042 
15043  SDValue A = Op.getOperand(0);
15044  SDValue B = Op.getOperand(1);
15045 
15046  // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
15047  if (VT == MVT::v4i32) {
15048  assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
15049  "Should not custom lower when pmuldq is available!");
15050 
15051  // Extract the odd parts.
15052  static const int UnpackMask[] = { 1, -1, 3, -1 };
15053  SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
15054  SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
15055 
15056  // Multiply the even parts.
15057  SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
15058  // Now multiply odd parts.
15059  SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
15060 
15061  Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
15062  Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
15063 
15064  // Merge the two vectors back together with a shuffle. This expands into 2
15065  // shuffles.
15066  static const int ShufMask[] = { 0, 4, 2, 6 };
15067  return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
15068  }
15069 
15070  assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
15071  "Only know how to lower V2I64/V4I64/V8I64 multiply");
15072 
15073  // Ahi = psrlqi(a, 32);
15074  // Bhi = psrlqi(b, 32);
15075  //
15076  // AloBlo = pmuludq(a, b);
15077  // AloBhi = pmuludq(a, Bhi);
15078  // AhiBlo = pmuludq(Ahi, b);
15079 
15080  // AloBhi = psllqi(AloBhi, 32);
15081  // AhiBlo = psllqi(AhiBlo, 32);
15082  // return AloBlo + AloBhi + AhiBlo;
15083 
15084  SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
15085  SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
15086 
15087  // Bit cast to 32-bit vectors for MULUDQ
15088  EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
15089  (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
15090  A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
15091  B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
15092  Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
15093  Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
15094 
15095  SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
15096  SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
15097  SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
15098 
15099  AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
15100  AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
15101 
15102  SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
15103  return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
15104 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasSSE41() const
Definition: X86Subtarget.h:315
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG)
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
bool hasSSE2() const
Definition: X86Subtarget.h:312
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
Definition: test.h:1
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
static SDValue LowerMUL_LOHI ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 15158 of file X86ISelLowering.cpp.

15159  {
15160  SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
15161  EVT VT = Op0.getValueType();
15162  SDLoc dl(Op);
15163 
15164  assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
15165  (VT == MVT::v8i32 && Subtarget->hasInt256()));
15166 
15167  // PMULxD operations multiply each even value (starting at 0) of LHS with
15168  // the related value of RHS and produce a widen result.
15169  // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
15170  // => <2 x i64> <ae|cg>
15171  //
15172  // In other word, to have all the results, we need to perform two PMULxD:
15173  // 1. one with the even values.
15174  // 2. one with the odd values.
15175  // To achieve #2, with need to place the odd values at an even position.
15176  //
15177  // Place the odd value at an even position (basically, shift all values 1
15178  // step to the left):
15179  const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
15180  // <a|b|c|d> => <b|undef|d|undef>
15181  SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
15182  // <e|f|g|h> => <f|undef|h|undef>
15183  SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
15184 
15185  // Emit two multiplies, one for the lower 2 ints and one for the higher 2
15186  // ints.
15187  MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
15188  bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
15189  unsigned Opcode =
15190  (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
15191  // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
15192  // => <2 x i64> <ae|cg>
15193  SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
15194  DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
15195  // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
15196  // => <2 x i64> <bf|dh>
15197  SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
15198  DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
15199 
15200  // Shuffle it back into the right order.
15201  // The internal representation is big endian.
15202  // In other words, a i64 bitcasted to 2 x i32 has its high part at index 0
15203  // and its low part at index 1.
15204  // Moreover, we have: Mul1 = <ae|cg> ; Mul2 = <bf|dh>
15205  // Vector index 0 1 ; 2 3
15206  // We want <ae|bf|cg|dh>
15207  // Vector index 0 2 1 3
15208  // Since each element is seen as 2 x i32, we get:
15209  // high_mask[i] = 2 x vector_index[i]
15210  // low_mask[i] = 2 x vector_index[i] + 1
15211  // where vector_index = {0, Size/2, 1, Size/2 + 1, ...,
15212  // Size/2 - 1, Size/2 + Size/2 - 1}
15213  // where Size is the number of element of the final vector.
15214  SDValue Highs, Lows;
15215  if (VT == MVT::v8i32) {
15216  const int HighMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
15217  Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
15218  const int LowMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
15219  Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
15220  } else {
15221  const int HighMask[] = {0, 4, 2, 6};
15222  Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
15223  const int LowMask[] = {1, 5, 3, 7};
15224  Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
15225  }
15226 
15227  // If we have a signed multiply but no PMULDQ fix up the high parts of a
15228  // unsigned multiply.
15229  if (IsSigned && !Subtarget->hasSSE41()) {
15230  SDValue ShAmt =
15232  SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
15233  DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
15234  SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
15235  DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
15236 
15237  SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
15238  Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
15239  }
15240 
15241  // The low part of a MUL_LOHI is supposed to be the first value and the
15242  // high part the second value.
15243  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Lows, Highs);
15244 }
unsigned getOpcode() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
EVT getShiftAmountTy(EVT LHSTy) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
bool hasSSE2() const
Definition: X86Subtarget.h:312
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
#define T1
static SDValue LowerREADCYCLECOUNTER ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 14376 of file X86ISelLowering.cpp.

14377  {
14378  SmallVector<SDValue, 2> Results;
14379  SDLoc DL(Op);
14380  getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
14381  Results);
14382  return DAG.getMergeValues(Results, DL);
14383 }
static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl< SDValue > &Results)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
static SDValue LowerSCALAR_TO_VECTOR ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 10056 of file X86ISelLowering.cpp.

10056  {
10057  SDLoc dl(Op);
10058  MVT OpVT = Op.getSimpleValueType();
10059 
10060  // If this is a 256-bit vector result, first insert into a 128-bit
10061  // vector and then insert into the 256-bit vector.
10062  if (!OpVT.is128BitVector()) {
10063  // Insert into a 128-bit vector.
10064  unsigned SizeFactor = OpVT.getSizeInBits()/128;
10065  MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
10066  OpVT.getVectorNumElements() / SizeFactor);
10067 
10068  Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
10069 
10070  // Insert the 128-bit vector.
10071  return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
10072  }
10073 
10074  if (OpVT == MVT::v1i64 &&
10075  Op.getOperand(0).getValueType() == MVT::i64)
10076  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
10077 
10078  SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
10079  assert(OpVT.is128BitVector() && "Expected an SSE type!");
10080  return DAG.getNode(ISD::BITCAST, dl, OpVT,
10081  DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
10082 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getSizeInBits() const
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
MVT getVectorElementType() const
static SDValue LowerScalarImmediateShift ( SDValue  Op,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 15246 of file X86ISelLowering.cpp.

15247  {
15248  MVT VT = Op.getSimpleValueType();
15249  SDLoc dl(Op);
15250  SDValue R = Op.getOperand(0);
15251  SDValue Amt = Op.getOperand(1);
15252 
15253  // Optimize shl/srl/sra with constant shift amount.
15254  if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
15255  if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
15256  uint64_t ShiftAmt = ShiftConst->getZExtValue();
15257 
15258  if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
15259  (Subtarget->hasInt256() &&
15260  (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
15261  (Subtarget->hasAVX512() &&
15262  (VT == MVT::v8i64 || VT == MVT::v16i32))) {
15263  if (Op.getOpcode() == ISD::SHL)
15264  return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
15265  DAG);
15266  if (Op.getOpcode() == ISD::SRL)
15267  return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
15268  DAG);
15269  if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
15270  return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
15271  DAG);
15272  }
15273 
15274  if (VT == MVT::v16i8) {
15275  if (Op.getOpcode() == ISD::SHL) {
15276  // Make a large shift.
15278  MVT::v8i16, R, ShiftAmt,
15279  DAG);
15280  SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
15281  // Zero out the rightmost bits.
15283  DAG.getConstant(uint8_t(-1U << ShiftAmt),
15284  MVT::i8));
15285  return DAG.getNode(ISD::AND, dl, VT, SHL,
15286  DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
15287  }
15288  if (Op.getOpcode() == ISD::SRL) {
15289  // Make a large shift.
15291  MVT::v8i16, R, ShiftAmt,
15292  DAG);
15293  SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
15294  // Zero out the leftmost bits.
15296  DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
15297  MVT::i8));
15298  return DAG.getNode(ISD::AND, dl, VT, SRL,
15299  DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
15300  }
15301  if (Op.getOpcode() == ISD::SRA) {
15302  if (ShiftAmt == 7) {
15303  // R s>> 7 === R s< 0
15304  SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
15305  return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
15306  }
15307 
15308  // R s>> a === ((R u>> a) ^ m) - m
15309  SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
15310  SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
15311  MVT::i8));
15312  SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
15313  Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
15314  Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
15315  return Res;
15316  }
15317  llvm_unreachable("Unknown shift opcode.");
15318  }
15319 
15320  if (Subtarget->hasInt256() && VT == MVT::v32i8) {
15321  if (Op.getOpcode() == ISD::SHL) {
15322  // Make a large shift.
15324  MVT::v16i16, R, ShiftAmt,
15325  DAG);
15326  SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
15327  // Zero out the rightmost bits.
15329  DAG.getConstant(uint8_t(-1U << ShiftAmt),
15330  MVT::i8));
15331  return DAG.getNode(ISD::AND, dl, VT, SHL,
15332  DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
15333  }
15334  if (Op.getOpcode() == ISD::SRL) {
15335  // Make a large shift.
15337  MVT::v16i16, R, ShiftAmt,
15338  DAG);
15339  SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
15340  // Zero out the leftmost bits.
15342  DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
15343  MVT::i8));
15344  return DAG.getNode(ISD::AND, dl, VT, SRL,
15345  DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
15346  }
15347  if (Op.getOpcode() == ISD::SRA) {
15348  if (ShiftAmt == 7) {
15349  // R s>> 7 === R s< 0
15350  SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
15351  return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
15352  }
15353 
15354  // R s>> a === ((R u>> a) ^ m) - m
15355  SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
15356  SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
15357  MVT::i8));
15358  SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
15359  Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
15360  Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
15361  return Res;
15362  }
15363  llvm_unreachable("Unknown shift opcode.");
15364  }
15365  }
15366  }
15367 
15368  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
15369  if (!Subtarget->is64Bit() &&
15370  (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
15371  Amt.getOpcode() == ISD::BITCAST &&
15372  Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
15373  Amt = Amt.getOperand(0);
15374  unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
15375  VT.getVectorNumElements();
15376  unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
15377  uint64_t ShiftAmt = 0;
15378  for (unsigned i = 0; i != Ratio; ++i) {
15380  if (!C)
15381  return SDValue();
15382  // 6 == Log2(64)
15383  ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
15384  }
15385  // Check remaining shift amounts.
15386  for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
15387  uint64_t ShAmt = 0;
15388  for (unsigned j = 0; j != Ratio; ++j) {
15389  ConstantSDNode *C =
15390  dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
15391  if (!C)
15392  return SDValue();
15393  // 6 == Log2(64)
15394  ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
15395  }
15396  if (ShAmt != ShiftAmt)
15397  return SDValue();
15398  }
15399  switch (Op.getOpcode()) {
15400  default:
15401  llvm_unreachable("Unknown shift opcode!");
15402  case ISD::SHL:
15403  return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
15404  DAG);
15405  case ISD::SRL:
15406  return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
15407  DAG);
15408  case ISD::SRA:
15409  return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
15410  DAG);
15411  }
15412  }
15413 
15414  return SDValue();
15415 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned Log2_32_Ceil(uint32_t Value)
Definition: MathExtras.h:465
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
unsigned getNumOperands() const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool hasInt256() const
Definition: X86Subtarget.h:321
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool hasAVX512() const
Definition: X86Subtarget.h:319
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
uint64_t getZExtValue() const
static SDValue LowerScalarVariableShift ( SDValue  Op,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 15417 of file X86ISelLowering.cpp.

15418  {
15419  MVT VT = Op.getSimpleValueType();
15420  SDLoc dl(Op);
15421  SDValue R = Op.getOperand(0);
15422  SDValue Amt = Op.getOperand(1);
15423 
15424  if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
15425  VT == MVT::v4i32 || VT == MVT::v8i16 ||
15426  (Subtarget->hasInt256() &&
15427  ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
15428  VT == MVT::v8i32 || VT == MVT::v16i16)) ||
15429  (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
15430  SDValue BaseShAmt;
15431  EVT EltVT = VT.getVectorElementType();
15432 
15433  if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
15434  unsigned NumElts = VT.getVectorNumElements();
15435  unsigned i, j;
15436  for (i = 0; i != NumElts; ++i) {
15437  if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
15438  continue;
15439  break;
15440  }
15441  for (j = i; j != NumElts; ++j) {
15442  SDValue Arg = Amt.getOperand(j);
15443  if (Arg.getOpcode() == ISD::UNDEF) continue;
15444  if (Arg != Amt.getOperand(i))
15445  break;
15446  }
15447  if (i != NumElts && j == NumElts)
15448  BaseShAmt = Amt.getOperand(i);
15449  } else {
15450  if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
15451  Amt = Amt.getOperand(0);
15452  if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
15453  cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
15454  SDValue InVec = Amt.getOperand(0);
15455  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
15456  unsigned NumElts = InVec.getValueType().getVectorNumElements();
15457  unsigned i = 0;
15458  for (; i != NumElts; ++i) {
15459  SDValue Arg = InVec.getOperand(i);
15460  if (Arg.getOpcode() == ISD::UNDEF) continue;
15461  BaseShAmt = Arg;
15462  break;
15463  }
15464  } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15465  if (ConstantSDNode *C =
15466  dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
15467  unsigned SplatIdx =
15468  cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
15469  if (C->getZExtValue() == SplatIdx)
15470  BaseShAmt = InVec.getOperand(1);
15471  }
15472  }
15473  if (!BaseShAmt.getNode())
15474  BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
15475  DAG.getIntPtrConstant(0));
15476  }
15477  }
15478 
15479  if (BaseShAmt.getNode()) {
15480  if (EltVT.bitsGT(MVT::i32))
15481  BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
15482  else if (EltVT.bitsLT(MVT::i32))
15483  BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
15484 
15485  switch (Op.getOpcode()) {
15486  default:
15487  llvm_unreachable("Unknown shift opcode!");
15488  case ISD::SHL:
15489  switch (VT.SimpleTy) {
15490  default: return SDValue();
15491  case MVT::v2i64:
15492  case MVT::v4i32:
15493  case MVT::v8i16:
15494  case MVT::v4i64:
15495  case MVT::v8i32:
15496  case MVT::v16i16:
15497  case MVT::v16i32:
15498  case MVT::v8i64:
15499  return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
15500  }
15501  case ISD::SRA:
15502  switch (VT.SimpleTy) {
15503  default: return SDValue();
15504  case MVT::v4i32:
15505  case MVT::v8i16:
15506  case MVT::v8i32:
15507  case MVT::v16i16:
15508  case MVT::v16i32:
15509  case MVT::v8i64:
15510  return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
15511  }
15512  case ISD::SRL:
15513  switch (VT.SimpleTy) {
15514  default: return SDValue();
15515  case MVT::v2i64:
15516  case MVT::v4i32:
15517  case MVT::v8i16:
15518  case MVT::v4i64:
15519  case MVT::v8i32:
15520  case MVT::v16i16:
15521  case MVT::v16i32:
15522  case MVT::v8i64:
15523  return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
15524  }
15525  }
15526  }
15527  }
15528 
15529  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
15530  if (!Subtarget->is64Bit() &&
15531  (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
15532  (Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
15533  Amt.getOpcode() == ISD::BITCAST &&
15534  Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
15535  Amt = Amt.getOperand(0);
15536  unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
15537  VT.getVectorNumElements();
15538  std::vector<SDValue> Vals(Ratio);
15539  for (unsigned i = 0; i != Ratio; ++i)
15540  Vals[i] = Amt.getOperand(i);
15541  for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
15542  for (unsigned j = 0; j != Ratio; ++j)
15543  if (Vals[j] != Amt.getOperand(i + j))
15544  return SDValue();
15545  }
15546  switch (Op.getOpcode()) {
15547  default:
15548  llvm_unreachable("Unknown shift opcode!");
15549  case ISD::SHL:
15550  return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
15551  case ISD::SRL:
15552  return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
15553  case ISD::SRA:
15554  return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
15555  }
15556  }
15557 
15558  return SDValue();
15559 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getNumOperands() const
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:190
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SimpleValueType SimpleTy
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG)
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:178
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
bool hasInt256() const
Definition: X86Subtarget.h:321
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool hasAVX512() const
Definition: X86Subtarget.h:319
EVT getValueType() const
MVT getVectorElementType() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue LowerShift ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 15561 of file X86ISelLowering.cpp.

15562  {
15563  MVT VT = Op.getSimpleValueType();
15564  SDLoc dl(Op);
15565  SDValue R = Op.getOperand(0);
15566  SDValue Amt = Op.getOperand(1);
15567  SDValue V;
15568 
15569  assert(VT.isVector() && "Custom lowering only for vector shifts!");
15570  assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
15571 
15572  V = LowerScalarImmediateShift(Op, DAG, Subtarget);
15573  if (V.getNode())
15574  return V;
15575 
15576  V = LowerScalarVariableShift(Op, DAG, Subtarget);
15577  if (V.getNode())
15578  return V;
15579 
15580  if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
15581  return Op;
15582  // AVX2 has VPSLLV/VPSRAV/VPSRLV.
15583  if (Subtarget->hasInt256()) {
15584  if (Op.getOpcode() == ISD::SRL &&
15585  (VT == MVT::v2i64 || VT == MVT::v4i32 ||
15586  VT == MVT::v4i64 || VT == MVT::v8i32))
15587  return Op;
15588  if (Op.getOpcode() == ISD::SHL &&
15589  (VT == MVT::v2i64 || VT == MVT::v4i32 ||
15590  VT == MVT::v4i64 || VT == MVT::v8i32))
15591  return Op;
15592  if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
15593  return Op;
15594  }
15595 
15596  // If possible, lower this packed shift into a vector multiply instead of
15597  // expanding it into a sequence of scalar shifts.
15598  // Do this only if the vector shift count is a constant build_vector.
15599  if (Op.getOpcode() == ISD::SHL &&
15600  (VT == MVT::v8i16 || VT == MVT::v4i32 ||
15601  (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
15604  EVT SVT = VT.getScalarType();
15605  unsigned SVTBits = SVT.getSizeInBits();
15606  const APInt &One = APInt(SVTBits, 1);
15607  unsigned NumElems = VT.getVectorNumElements();
15608 
15609  for (unsigned i=0; i !=NumElems; ++i) {
15610  SDValue Op = Amt->getOperand(i);
15611  if (Op->getOpcode() == ISD::UNDEF) {
15612  Elts.push_back(Op);
15613  continue;
15614  }
15615 
15616  ConstantSDNode *ND = cast<ConstantSDNode>(Op);
15617  const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
15618  uint64_t ShAmt = C.getZExtValue();
15619  if (ShAmt >= SVTBits) {
15620  Elts.push_back(DAG.getUNDEF(SVT));
15621  continue;
15622  }
15623  Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
15624  }
15625  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
15626  return DAG.getNode(ISD::MUL, dl, VT, R, BV);
15627  }
15628 
15629  // Lower SHL with variable shift amount.
15630  if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
15631  Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
15632 
15633  Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
15634  Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
15635  Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
15636  return DAG.getNode(ISD::MUL, dl, VT, Op, R);
15637  }
15638 
15639  // If possible, lower this shift as a sequence of two shifts by
15640  // constant plus a MOVSS/MOVSD instead of scalarizing it.
15641  // Example:
15642  // (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
15643  //
15644  // Could be rewritten as:
15645  // (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
15646  //
15647  // The advantage is that the two shifts from the example would be
15648  // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing
15649  // the vector shift into four scalar shifts plus four pairs of vector
15650  // insert/extract.
15651  if ((VT == MVT::v8i16 || VT == MVT::v4i32) &&
15653  unsigned TargetOpcode = X86ISD::MOVSS;
15654  bool CanBeSimplified;
15655  // The splat value for the first packed shift (the 'X' from the example).
15656  SDValue Amt1 = Amt->getOperand(0);
15657  // The splat value for the second packed shift (the 'Y' from the example).
15658  SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) :
15659  Amt->getOperand(2);
15660 
15661  // See if it is possible to replace this node with a sequence of
15662  // two shifts followed by a MOVSS/MOVSD
15663  if (VT == MVT::v4i32) {
15664  // Check if it is legal to use a MOVSS.
15665  CanBeSimplified = Amt2 == Amt->getOperand(2) &&
15666  Amt2 == Amt->getOperand(3);
15667  if (!CanBeSimplified) {
15668  // Otherwise, check if we can still simplify this node using a MOVSD.
15669  CanBeSimplified = Amt1 == Amt->getOperand(1) &&
15670  Amt->getOperand(2) == Amt->getOperand(3);
15671  TargetOpcode = X86ISD::MOVSD;
15672  Amt2 = Amt->getOperand(2);
15673  }
15674  } else {
15675  // Do similar checks for the case where the machine value type
15676  // is MVT::v8i16.
15677  CanBeSimplified = Amt1 == Amt->getOperand(1);
15678  for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
15679  CanBeSimplified = Amt2 == Amt->getOperand(i);
15680 
15681  if (!CanBeSimplified) {
15682  TargetOpcode = X86ISD::MOVSD;
15683  CanBeSimplified = true;
15684  Amt2 = Amt->getOperand(4);
15685  for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
15686  CanBeSimplified = Amt1 == Amt->getOperand(i);
15687  for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
15688  CanBeSimplified = Amt2 == Amt->getOperand(j);
15689  }
15690  }
15691 
15692  if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
15693  isa<ConstantSDNode>(Amt2)) {
15694  // Replace this node with two shifts followed by a MOVSS/MOVSD.
15695  EVT CastVT = MVT::v4i32;
15696  SDValue Splat1 =
15697  DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
15698  SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
15699  SDValue Splat2 =
15700  DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
15701  SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
15702  if (TargetOpcode == X86ISD::MOVSD)
15703  CastVT = MVT::v2i64;
15704  SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1);
15705  SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2);
15706  SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
15707  BitCast1, DAG);
15708  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
15709  }
15710  }
15711 
15712  if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
15713  assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
15714 
15715  // a = a << 5;
15716  Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
15717  Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
15718 
15719  // Turn 'a' into a mask suitable for VSELECT
15720  SDValue VSelM = DAG.getConstant(0x80, VT);
15721  SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
15722  OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
15723 
15724  SDValue CM1 = DAG.getConstant(0x0f, VT);
15725  SDValue CM2 = DAG.getConstant(0x3f, VT);
15726 
15727  // r = VSELECT(r, psllw(r & (char16)15, 4), a);
15728  SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
15729  M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
15730  M = DAG.getNode(ISD::BITCAST, dl, VT, M);
15731  R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
15732 
15733  // a += a
15734  Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
15735  OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
15736  OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
15737 
15738  // r = VSELECT(r, psllw(r & (char16)63, 2), a);
15739  M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
15740  M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
15741  M = DAG.getNode(ISD::BITCAST, dl, VT, M);
15742  R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
15743 
15744  // a += a
15745  Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
15746  OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
15747  OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
15748 
15749  // return VSELECT(r, r+r, a);
15750  R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
15751  DAG.getNode(ISD::ADD, dl, VT, R, R), R);
15752  return R;
15753  }
15754 
15755  // It's worth extending once and using the v8i32 shifts for 16-bit types, but
15756  // the extra overheads to get from v16i8 to v8i32 make the existing SSE
15757  // solution better.
15758  if (Subtarget->hasInt256() && VT == MVT::v8i16) {
15759  MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
15760  unsigned ExtOpc =
15761  Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15762  R = DAG.getNode(ExtOpc, dl, NewVT, R);
15763  Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
15764  return DAG.getNode(ISD::TRUNCATE, dl, VT,
15765  DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
15766  }
15767 
15768  // Decompose 256-bit shifts into smaller 128-bit shifts.
15769  if (VT.is256BitVector()) {
15770  unsigned NumElems = VT.getVectorNumElements();
15771  MVT EltVT = VT.getVectorElementType();
15772  EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
15773 
15774  // Extract the two vectors
15775  SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
15776  SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl);
15777 
15778  // Recreate the shift amount vectors
15779  SDValue Amt1, Amt2;
15780  if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
15781  // Constant shift amount
15782  SmallVector<SDValue, 4> Amt1Csts;
15783  SmallVector<SDValue, 4> Amt2Csts;
15784  for (unsigned i = 0; i != NumElems/2; ++i)
15785  Amt1Csts.push_back(Amt->getOperand(i));
15786  for (unsigned i = NumElems/2; i != NumElems; ++i)
15787  Amt2Csts.push_back(Amt->getOperand(i));
15788 
15789  Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
15790  Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
15791  } else {
15792  // Variable shift amount
15793  Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
15794  Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl);
15795  }
15796 
15797  // Issue new vector shifts for the smaller types
15798  V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
15799  V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
15800 
15801  // Concatenate the result back
15802  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
15803  }
15804 
15805  return SDValue();
15806 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1302
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
EVT getScalarType() const
Definition: ValueTypes.h:211
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
const APInt & getAPIntValue() const
APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:852
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
bool isVector() const
isVector - Return true if this is a vector value type.
unsigned getOpcode() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
Class for arbitrary precision integers.
Definition: APInt.h:75
bool hasInt256() const
Definition: X86Subtarget.h:321
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool hasAVX512() const
Definition: X86Subtarget.h:319
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG)
MVT getVectorElementType() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static SDValue LowerShiftParts ( SDValue  Op,
SelectionDAG DAG 
)
static

LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and take a 2 x i32 value to shift plus a shift amount.

Definition at line 10625 of file X86ISelLowering.cpp.

10625  {
10626  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
10627  MVT VT = Op.getSimpleValueType();
10628  unsigned VTBits = VT.getSizeInBits();
10629  SDLoc dl(Op);
10630  bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
10631  SDValue ShOpLo = Op.getOperand(0);
10632  SDValue ShOpHi = Op.getOperand(1);
10633  SDValue ShAmt = Op.getOperand(2);
10634  // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
10635  // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
10636  // during isel.
10637  SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
10638  DAG.getConstant(VTBits - 1, MVT::i8));
10639  SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
10640  DAG.getConstant(VTBits - 1, MVT::i8))
10641  : DAG.getConstant(0, VT);
10642 
10643  SDValue Tmp2, Tmp3;
10644  if (Op.getOpcode() == ISD::SHL_PARTS) {
10645  Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
10646  Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
10647  } else {
10648  Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
10649  Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
10650  }
10651 
10652  // If the shift amount is larger or equal than the width of a part we can't
10653  // rely on the results of shld/shrd. Insert a test and select the appropriate
10654  // values for large shift amounts.
10655  SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
10656  DAG.getConstant(VTBits, MVT::i8));
10657  SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
10658  AndNode, DAG.getConstant(0, MVT::i8));
10659 
10660  SDValue Hi, Lo;
10661  SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
10662  SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
10663  SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
10664 
10665  if (Op.getOpcode() == ISD::SHL_PARTS) {
10666  Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
10667  Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
10668  } else {
10669  Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
10670  Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
10671  }
10672 
10673  SDValue Ops[2] = { Lo, Hi };
10674  return DAG.getMergeValues(Ops, dl);
10675 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
getMergeValues - Create a MERGE_VALUES node from the given operands.
unsigned getSizeInBits() const
unsigned getNumOperands() const
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerSIGN_EXTEND ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 12763 of file X86ISelLowering.cpp.

12764  {
12765  MVT VT = Op->getSimpleValueType(0);
12766  SDValue In = Op->getOperand(0);
12767  MVT InVT = In.getSimpleValueType();
12768  SDLoc dl(Op);
12769 
12770  if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
12771  return LowerSIGN_EXTEND_AVX512(Op, DAG);
12772 
12773  if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
12774  (VT != MVT::v8i32 || InVT != MVT::v8i16) &&
12775  (VT != MVT::v16i16 || InVT != MVT::v16i8))
12776  return SDValue();
12777 
12778  if (Subtarget->hasInt256())
12779  return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
12780 
12781  // Optimize vectors in AVX mode
12782  // Sign extend v8i16 to v8i32 and
12783  // v4i32 to v4i64
12784  //
12785  // Divide input vector into two parts
12786  // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
12787  // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
12788  // concat the vectors to original VT
12789 
12790  unsigned NumElems = InVT.getVectorNumElements();
12791  SDValue Undef = DAG.getUNDEF(InVT);
12792 
12793  SmallVector<int,8> ShufMask1(NumElems, -1);
12794  for (unsigned i = 0; i != NumElems/2; ++i)
12795  ShufMask1[i] = i;
12796 
12797  SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
12798 
12799  SmallVector<int,8> ShufMask2(NumElems, -1);
12800  for (unsigned i = 0; i != NumElems/2; ++i)
12801  ShufMask2[i] = i + NumElems/2;
12802 
12803  SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
12804 
12805  MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
12806  VT.getVectorNumElements()/2);
12807 
12808  OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
12809  OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
12810 
12811  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
12812 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
const SDValue & getOperand(unsigned Num) const
MVT getScalarType() const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
unsigned getVectorNumElements() const
***NAME is the name of the raw_ostream unsigned & i1
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerSIGN_EXTEND_AVX512 ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 12732 of file X86ISelLowering.cpp.

12732  {
12733  MVT VT = Op->getSimpleValueType(0);
12734  SDValue In = Op->getOperand(0);
12735  MVT InVT = In.getSimpleValueType();
12736  SDLoc dl(Op);
12737 
12738  unsigned int NumElts = VT.getVectorNumElements();
12739  if (NumElts != 8 && NumElts != 16)
12740  return SDValue();
12741 
12742  if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
12743  return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
12744 
12745  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12746  assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
12747 
12748  MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
12749  Constant *C = ConstantInt::get(*DAG.getContext(),
12750  APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
12751 
12752  SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
12753  unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
12754  SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
12755  MachinePointerInfo::getConstantPool(),
12756  false, false, false, Alignment);
12757  SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
12758  if (VT.is512BitVector())
12759  return Brcst;
12760  return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
12761 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
const SDValue & getOperand(unsigned Num) const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
virtual MVT getPointerTy(uint32_t=0) const
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
LLVM Constant Representation.
Definition: Constant.h:41
***NAME is the name of the raw_ostream unsigned & i1
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getVectorElementType() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerSUB ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 15027 of file X86ISelLowering.cpp.

15027  {
15029  Op.getSimpleValueType().isInteger() &&
15030  "Only handle AVX 256-bit vector integer operation");
15031  return Lower256IntArith(Op, DAG);
15032 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG)
assert(Globals.size() > 1)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
static SDValue LowerToTLSExecModel ( GlobalAddressSDNode GA,
SelectionDAG DAG,
const EVT  PtrVT,
TLSModel::Model  model,
bool  is64Bit,
bool  isPIC 
)
static

Definition at line 10430 of file X86ISelLowering.cpp.

10432  {
10433  SDLoc dl(GA);
10434 
10435  // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
10436  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
10437  is64Bit ? 257 : 256));
10438 
10440  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
10441  MachinePointerInfo(Ptr), false, false, false, 0);
10442 
10443  unsigned char OperandFlags = 0;
10444  // Most TLS accesses are not RIP relative, even on x86-64. One exception is
10445  // initialexec.
10446  unsigned WrapperKind = X86ISD::Wrapper;
10447  if (model == TLSModel::LocalExec) {
10448  OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
10449  } else if (model == TLSModel::InitialExec) {
10450  if (is64Bit) {
10451  OperandFlags = X86II::MO_GOTTPOFF;
10452  WrapperKind = X86ISD::WrapperRIP;
10453  } else {
10454  OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
10455  }
10456  } else {
10457  llvm_unreachable("Unexpected model");
10458  }
10459 
10460  // emit "addl x@ntpoff,%eax" (local exec)
10461  // or "addl x@indntpoff,%eax" (initial exec)
10462  // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
10463  SDValue TGA =
10464  DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
10465  GA->getOffset(), OperandFlags);
10466  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
10467 
10468  if (model == TLSModel::InitialExec) {
10469  if (isPIC && !is64Bit) {
10470  Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
10471  DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
10472  Offset);
10473  }
10474 
10475  Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
10476  MachinePointerInfo::getGOT(), false, false, false, 0);
10477  }
10478 
10479  // The address of the thread local variable is the add of the thread
10480  // pointer with the offset of the variable.
10481  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
10482 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
const GlobalValue * getGlobal() const
static bool is64Bit(const ELFYAML::Object &Doc)
Definition: yaml2elf.cpp:463
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:433
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
LLVM Value Representation.
Definition: Value.h:69
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerToTLSGeneralDynamicModel32 ( GlobalAddressSDNode GA,
SelectionDAG DAG,
const EVT  PtrVT 
)
static

Definition at line 10370 of file X86ISelLowering.cpp.

10371  {
10372  SDValue InFlag;
10373  SDLoc dl(GA); // ? function entry point might be better
10374  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
10376  SDLoc(), PtrVT), InFlag);
10377  InFlag = Chain.getValue(1);
10378 
10379  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
10380 }
SDValue getValue(unsigned R) const
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:486
MO_TLSGD - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:59
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerToTLSGeneralDynamicModel64 ( GlobalAddressSDNode GA,
SelectionDAG DAG,
const EVT  PtrVT 
)
static

Definition at line 10384 of file X86ISelLowering.cpp.

10385  {
10386  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
10387  X86::RAX, X86II::MO_TLSGD);
10388 }
MO_TLSGD - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:59
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic=false)
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerToTLSLocalDynamicModel ( GlobalAddressSDNode GA,
SelectionDAG DAG,
const EVT  PtrVT,
bool  is64Bit 
)
static

Definition at line 10390 of file X86ISelLowering.cpp.

10393  {
10394  SDLoc dl(GA);
10395 
10396  // Get the start address of the TLS block for this module.
10400 
10401  SDValue Base;
10402  if (is64Bit) {
10403  Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
10404  X86II::MO_TLSLD, /*LocalDynamic=*/true);
10405  } else {
10406  SDValue InFlag;
10407  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
10408  DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
10409  InFlag = Chain.getValue(1);
10410  Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
10411  X86II::MO_TLSLDM, /*LocalDynamic=*/true);
10412  }
10413 
10414  // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
10415  // of Base.
10416 
10417  // Build x@dtpoff.
10418  unsigned char OperandFlags = X86II::MO_DTPOFF;
10419  unsigned WrapperKind = X86ISD::Wrapper;
10420  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
10421  GA->getValueType(0),
10422  GA->getOffset(), OperandFlags);
10423  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
10424 
10425  // Add x@dtpoff with the base.
10426  return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
10427 }
SDValue getValue(unsigned R) const
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:486
MO_TLSLDM - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:64
const GlobalValue * getGlobal() const
static bool is64Bit(const ELFYAML::Object &Doc)
Definition: yaml2elf.cpp:463
EVT getValueType(unsigned ResNo) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
SDValue getTargetGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:433
static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool LocalDynamic=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue lowerV16I8VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Generic lowering of v16i8 shuffles.

This is a hybrid strategy to lower v16i8 vectors. It first attempts to detect any complexity reducing interleaving. If that doesn't help, it uses UNPCK to spread the i8 elements across two i16-element vectors, and uses the existing lowering for v8i16 blends on each half, finally PACK-ing them back together.

Definition at line 7668 of file X86ISelLowering.cpp.

7670  {
7671  SDLoc DL(Op);
7672  assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
7673  assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
7674  assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
7675  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7676  ArrayRef<int> OrigMask = SVOp->getMask();
7677  assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
7678  int MaskStorage[16] = {
7679  OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
7680  OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
7681  OrigMask[8], OrigMask[9], OrigMask[10], OrigMask[11],
7682  OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]};
7683  MutableArrayRef<int> Mask(MaskStorage);
7684  MutableArrayRef<int> LoMask = Mask.slice(0, 8);
7685  MutableArrayRef<int> HiMask = Mask.slice(8, 8);
7686 
7687  // For single-input shuffles, there are some nicer lowering tricks we can use.
7688  if (isSingleInputShuffleMask(Mask)) {
7689  // Check whether we can widen this to an i16 shuffle by duplicating bytes.
7690  // Notably, this handles splat and partial-splat shuffles more efficiently.
7691  // However, it only makes sense if the pre-duplication shuffle simplifies
7692  // things significantly. Currently, this means we need to be able to
7693  // express the pre-duplication shuffle as an i16 shuffle.
7694  //
7695  // FIXME: We should check for other patterns which can be widened into an
7696  // i16 shuffle as well.
7697  auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
7698  for (int i = 0; i < 16; i += 2) {
7699  if (Mask[i] != Mask[i + 1])
7700  return false;
7701  }
7702  return true;
7703  };
7704  auto tryToWidenViaDuplication = [&]() -> SDValue {
7705  if (!canWidenViaDuplication(Mask))
7706  return SDValue();
7707  SmallVector<int, 4> LoInputs;
7708  std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
7709  [](int M) { return M >= 0 && M < 8; });
7710  std::sort(LoInputs.begin(), LoInputs.end());
7711  LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
7712  LoInputs.end());
7713  SmallVector<int, 4> HiInputs;
7714  std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
7715  [](int M) { return M >= 8; });
7716  std::sort(HiInputs.begin(), HiInputs.end());
7717  HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
7718  HiInputs.end());
7719 
7720  bool TargetLo = LoInputs.size() >= HiInputs.size();
7721  ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
7722  ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
7723 
7724  int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
7726  for (int I : InPlaceInputs) {
7727  PreDupI16Shuffle[I/2] = I/2;
7728  LaneMap[I] = I;
7729  }
7730  int j = TargetLo ? 0 : 4, je = j + 4;
7731  for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
7732  // Check if j is already a shuffle of this input. This happens when
7733  // there are two adjacent bytes after we move the low one.
7734  if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
7735  // If we haven't yet mapped the input, search for a slot into which
7736  // we can map it.
7737  while (j < je && PreDupI16Shuffle[j] != -1)
7738  ++j;
7739 
7740  if (j == je)
7741  // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
7742  return SDValue();
7743 
7744  // Map this input with the i16 shuffle.
7745  PreDupI16Shuffle[j] = MovingInputs[i] / 2;
7746  }
7747 
7748  // Update the lane map based on the mapping we ended up with.
7749  LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
7750  }
7751  V1 = DAG.getNode(
7752  ISD::BITCAST, DL, MVT::v16i8,
7753  DAG.getVectorShuffle(MVT::v8i16, DL,
7754  DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
7755  DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
7756 
7757  // Unpack the bytes to form the i16s that will be shuffled into place.
7758  V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
7759  MVT::v16i8, V1, V1);
7760 
7761  int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7762  for (int i = 0; i < 16; i += 2) {
7763  if (Mask[i] != -1)
7764  PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
7765  assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!");
7766  }
7767  return DAG.getNode(
7768  ISD::BITCAST, DL, MVT::v16i8,
7769  DAG.getVectorShuffle(MVT::v8i16, DL,
7770  DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
7771  DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
7772  };
7773  if (SDValue V = tryToWidenViaDuplication())
7774  return V;
7775  }
7776 
7777  // Check whether an interleaving lowering is likely to be more efficient.
7778  // This isn't perfect but it is a strong heuristic that tends to work well on
7779  // the kinds of shuffles that show up in practice.
7780  //
7781  // FIXME: We need to handle other interleaving widths (i16, i32, ...).
7782  if (shouldLowerAsInterleaving(Mask)) {
7783  // FIXME: Figure out whether we should pack these into the low or high
7784  // halves.
7785 
7786  int EMask[16], OMask[16];
7787  for (int i = 0; i < 8; ++i) {
7788  EMask[i] = Mask[2*i];
7789  OMask[i] = Mask[2*i + 1];
7790  EMask[i + 8] = -1;
7791  OMask[i + 8] = -1;
7792  }
7793 
7794  SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
7795  SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
7796 
7797  return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
7798  }
7799 
7800  int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7801  int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7802  int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7803  int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7804 
7805  auto buildBlendMasks = [](MutableArrayRef<int> HalfMask,
7806  MutableArrayRef<int> V1HalfBlendMask,
7807  MutableArrayRef<int> V2HalfBlendMask) {
7808  for (int i = 0; i < 8; ++i)
7809  if (HalfMask[i] >= 0 && HalfMask[i] < 16) {
7810  V1HalfBlendMask[i] = HalfMask[i];
7811  HalfMask[i] = i;
7812  } else if (HalfMask[i] >= 16) {
7813  V2HalfBlendMask[i] = HalfMask[i] - 16;
7814  HalfMask[i] = i + 8;
7815  }
7816  };
7817  buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask);
7818  buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask);
7819 
7820  SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
7821 
7822  auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
7823  MutableArrayRef<int> HiBlendMask) {
7824  SDValue V1, V2;
7825  // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
7826  // them out and avoid using UNPCK{L,H} to extract the elements of V as
7827  // i16s.
7828  if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
7829  [](int M) { return M >= 0 && M % 2 == 1; }) &&
7830  std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
7831  [](int M) { return M >= 0 && M % 2 == 1; })) {
7832  // Use a mask to drop the high bytes.
7833  V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
7834  V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
7835  DAG.getConstant(0x00FF, MVT::v8i16));
7836 
7837  // This will be a single vector shuffle instead of a blend so nuke V2.
7838  V2 = DAG.getUNDEF(MVT::v8i16);
7839 
7840  // Squash the masks to point directly into V1.
7841  for (int &M : LoBlendMask)
7842  if (M >= 0)
7843  M /= 2;
7844  for (int &M : HiBlendMask)
7845  if (M >= 0)
7846  M /= 2;
7847  } else {
7848  // Otherwise just unpack the low half of V into V1 and the high half into
7849  // V2 so that we can blend them as i16s.
7850  V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
7851  DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
7852  V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
7853  DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
7854  }
7855 
7856  SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
7857  SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
7858  return std::make_pair(BlendedLo, BlendedHi);
7859  };
7860  SDValue V1Lo, V1Hi, V2Lo, V2Hi;
7861  std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask);
7862  std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask);
7863 
7864  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask);
7865  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask);
7866 
7867  return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
7868 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
static bool isSingleInputShuffleMask(ArrayRef< int > Mask)
Helper function to classify a mask as a single-input mask.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
assert(Globals.size() > 1)
iterator erase(iterator I)
Definition: SmallVector.h:450
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MutableArrayRef< T > slice(unsigned N) const
slice(n) - Chop off the first N elements of the array.
Definition: ArrayRef.h:250
static bool shouldLowerAsInterleaving(ArrayRef< int > Mask)
Detect whether the mask pattern should be lowered through interleaving.
#define I(x, y, z)
Definition: MD5.cpp:54
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue lowerV2F64VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Handle lowering of 2-lane 64-bit floating point shuffles.

This is the basis function for the 2-lane 64-bit shuffles as we have full support for floating point shuffles but not integer shuffles. These instructions will incur a domain crossing penalty on some chips though so it is better to avoid lowering through this for integer vectors where possible.

Definition at line 6951 of file X86ISelLowering.cpp.

6953  {
6954  SDLoc DL(Op);
6955  assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
6956  assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
6957  assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
6958  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
6959  ArrayRef<int> Mask = SVOp->getMask();
6960  assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
6961 
6962  if (isSingleInputShuffleMask(Mask)) {
6963  // Straight shuffle of a single input vector. Simulate this by using the
6964  // single input as both of the "inputs" to this instruction..
6965  unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
6966  return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
6967  DAG.getConstant(SHUFPDMask, MVT::i8));
6968  }
6969  assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
6970  assert(Mask[1] >= 2 && "Non-canonicalized blend!");
6971 
6972  unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
6973  return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
6974  DAG.getConstant(SHUFPDMask, MVT::i8));
6975 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static bool isSingleInputShuffleMask(ArrayRef< int > Mask)
Helper function to classify a mask as a single-input mask.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue lowerV2I64VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Handle lowering of 2-lane 64-bit integer shuffles.

Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by the integer unit to minimize domain crossing penalties. However, for blends it falls back to the floating point shuffle operation with appropriate bit casting.

Definition at line 6983 of file X86ISelLowering.cpp.

6985  {
6986  SDLoc DL(Op);
6987  assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
6988  assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
6989  assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
6990  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
6991  ArrayRef<int> Mask = SVOp->getMask();
6992  assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
6993 
6994  if (isSingleInputShuffleMask(Mask)) {
6995  // Straight shuffle of a single input vector. For everything from SSE2
6996  // onward this has a single fast instruction with no scary immediates.
6997  // We have to map the mask as it is actually a v4i32 shuffle instruction.
6998  V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
6999  int WidenedMask[4] = {
7000  std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
7001  std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
7002  return DAG.getNode(
7003  ISD::BITCAST, DL, MVT::v2i64,
7004  DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1,
7005  getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
7006  }
7007 
7008  // We implement this with SHUFPD which is pretty lame because it will likely
7009  // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
7010  // However, all the alternatives are still more cycles and newer chips don't
7011  // have this problem. It would be really nice if x86 had better shuffles here.
7012  V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
7013  V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
7014  return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
7015  DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
7016 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static bool isSingleInputShuffleMask(ArrayRef< int > Mask)
Helper function to classify a mask as a single-input mask.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
assert(Globals.size() > 1)
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue lowerV4F32VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Lower 4-lane 32-bit floating point shuffles.

Uses instructions exclusively from the floating point unit to minimize domain crossing penalties, as these are sufficient to implement all v4f32 shuffles.

Definition at line 7023 of file X86ISelLowering.cpp.

7025  {
7026  SDLoc DL(Op);
7027  assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
7028  assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
7029  assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
7030  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7031  ArrayRef<int> Mask = SVOp->getMask();
7032  assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
7033 
7034  SDValue LowV = V1, HighV = V2;
7035  int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
7036 
7037  int NumV2Elements =
7038  std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
7039 
7040  if (NumV2Elements == 0)
7041  // Straight shuffle of a single input vector. We pass the input vector to
7042  // both operands to simulate this with a SHUFPS.
7043  return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
7044  getV4X86ShuffleImm8ForMask(Mask, DAG));
7045 
7046  if (NumV2Elements == 1) {
7047  int V2Index =
7048  std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
7049  Mask.begin();
7050  // Compute the index adjacent to V2Index and in the same half by toggling
7051  // the low bit.
7052  int V2AdjIndex = V2Index ^ 1;
7053 
7054  if (Mask[V2AdjIndex] == -1) {
7055  // Handles all the cases where we have a single V2 element and an undef.
7056  // This will only ever happen in the high lanes because we commute the
7057  // vector otherwise.
7058  if (V2Index < 2)
7059  std::swap(LowV, HighV);
7060  NewMask[V2Index] -= 4;
7061  } else {
7062  // Handle the case where the V2 element ends up adjacent to a V1 element.
7063  // To make this work, blend them together as the first step.
7064  int V1Index = V2AdjIndex;
7065  int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
7066  V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1,
7067  getV4X86ShuffleImm8ForMask(BlendMask, DAG));
7068 
7069  // Now proceed to reconstruct the final blend as we have the necessary
7070  // high or low half formed.
7071  if (V2Index < 2) {
7072  LowV = V2;
7073  HighV = V1;
7074  } else {
7075  HighV = V2;
7076  }
7077  NewMask[V1Index] = 2; // We put the V1 element in V2[2].
7078  NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
7079  }
7080  } else if (NumV2Elements == 2) {
7081  if (Mask[0] < 4 && Mask[1] < 4) {
7082  // Handle the easy case where we have V1 in the low lanes and V2 in the
7083  // high lanes. We never see this reversed because we sort the shuffle.
7084  NewMask[2] -= 4;
7085  NewMask[3] -= 4;
7086  } else {
7087  // We have a mixture of V1 and V2 in both low and high lanes. Rather than
7088  // trying to place elements directly, just blend them and set up the final
7089  // shuffle to place them.
7090 
7091  // The first two blend mask elements are for V1, the second two are for
7092  // V2.
7093  int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
7094  Mask[2] < 4 ? Mask[2] : Mask[3],
7095  (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
7096  (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
7097  V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2,
7098  getV4X86ShuffleImm8ForMask(BlendMask, DAG));
7099 
7100  // Now we do a normal shuffle of V1 by giving V1 as both operands to
7101  // a blend.
7102  LowV = HighV = V1;
7103  NewMask[0] = Mask[0] < 4 ? 0 : 2;
7104  NewMask[1] = Mask[0] < 4 ? 2 : 0;
7105  NewMask[2] = Mask[2] < 4 ? 1 : 3;
7106  NewMask[3] = Mask[2] < 4 ? 3 : 1;
7107  }
7108  }
7109  return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV,
7110  getV4X86ShuffleImm8ForMask(NewMask, DAG));
7111 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
assert(Globals.size() > 1)
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue lowerV4I32VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Lower 4-lane i32 vector shuffles.

We try to handle these with integer-domain shuffles where we can, but for blends we use the floating point domain blend instructions.

Definition at line 7117 of file X86ISelLowering.cpp.

7119  {
7120  SDLoc DL(Op);
7121  assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
7122  assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
7123  assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
7124  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7125  ArrayRef<int> Mask = SVOp->getMask();
7126  assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
7127 
7128  if (isSingleInputShuffleMask(Mask))
7129  // Straight shuffle of a single input vector. For everything from SSE2
7130  // onward this has a single fast instruction with no scary immediates.
7131  return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
7132  getV4X86ShuffleImm8ForMask(Mask, DAG));
7133 
7134  // We implement this with SHUFPS because it can blend from two vectors.
7135  // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
7136  // up the inputs, bypassing domain shift penalties that we would encur if we
7137  // directly used PSHUFD on Nehalem and older. For newer chips, this isn't
7138  // relevant.
7139  return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
7140  DAG.getVectorShuffle(
7141  MVT::v4f32, DL,
7142  DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
7143  DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
7144 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static bool isSingleInputShuffleMask(ArrayRef< int > Mask)
Helper function to classify a mask as a single-input mask.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
assert(Globals.size() > 1)
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue lowerV8I16BasicBlendVectorShuffle ( SDLoc  DL,
SDValue  V1,
SDValue  V2,
MutableArrayRef< int >  Mask,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Blend two v8i16 vectors using a naive unpack strategy.

This strategy only works when the inputs from each vector fit into a single half of that vector, and generally there are not so many inputs as to leave the in-place shuffles required highly constrained (and thus expensive). It shifts all the inputs into a single side of both input vectors and then uses an unpack to interleave these inputs in a single vector. At that point, we will fall back on the generic single input shuffle lowering.

Definition at line 7474 of file X86ISelLowering.cpp.

7478  {
7479  assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
7480  assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
7481  SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
7482  for (int i = 0; i < 8; ++i)
7483  if (Mask[i] >= 0 && Mask[i] < 4)
7484  LoV1Inputs.push_back(i);
7485  else if (Mask[i] >= 4 && Mask[i] < 8)
7486  HiV1Inputs.push_back(i);
7487  else if (Mask[i] >= 8 && Mask[i] < 12)
7488  LoV2Inputs.push_back(i);
7489  else if (Mask[i] >= 12)
7490  HiV2Inputs.push_back(i);
7491 
7492  int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
7493  int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
7494  (void)NumV1Inputs;
7495  (void)NumV2Inputs;
7496  assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
7497  assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
7498  assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
7499 
7500  bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
7501  HiV1Inputs.size() + HiV2Inputs.size();
7502 
7503  auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
7504  ArrayRef<int> HiInputs, bool MoveToLo,
7505  int MaskOffset) {
7506  ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
7507  ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
7508  if (BadInputs.empty())
7509  return V;
7510 
7511  int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
7512  int MoveOffset = MoveToLo ? 0 : 4;
7513 
7514  if (GoodInputs.empty()) {
7515  for (int BadInput : BadInputs) {
7516  MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset;
7517  Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset;
7518  }
7519  } else {
7520  if (GoodInputs.size() == 2) {
7521  // If the low inputs are spread across two dwords, pack them into
7522  // a single dword.
7523  MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] =
7524  Mask[GoodInputs[0]] - MaskOffset;
7525  MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] =
7526  Mask[GoodInputs[1]] - MaskOffset;
7527  Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
7528  Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
7529  } else {
7530  // Otherwise pin the low inputs.
7531  for (int GoodInput : GoodInputs)
7532  MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
7533  }
7534 
7535  int MoveMaskIdx =
7536  std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) -
7537  std::begin(MoveMask);
7538  assert(MoveMaskIdx >= MoveOffset && "Established above");
7539 
7540  if (BadInputs.size() == 2) {
7541  assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
7542  assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
7543  MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] =
7544  Mask[BadInputs[0]] - MaskOffset;
7545  MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] =
7546  Mask[BadInputs[1]] - MaskOffset;
7547  Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset;
7548  Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset;
7549  } else {
7550  assert(BadInputs.size() == 1 && "All sizes handled");
7551  MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
7552  Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
7553  }
7554  }
7555 
7556  return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
7557  MoveMask);
7558  };
7559  V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo,
7560  /*MaskOffset*/ 0);
7561  V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo,
7562  /*MaskOffset*/ 8);
7563 
7564  // FIXME: Select an interleaving of the merge of V1 and V2 that minimizes
7565  // cross-half traffic in the final shuffle.
7566 
7567  // Munge the mask to be a single-input mask after the unpack merges the
7568  // results.
7569  for (int &M : Mask)
7570  if (M != -1)
7571  M = 2 * (M % 4) + (M / 8);
7572 
7573  return DAG.getVectorShuffle(
7574  MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
7575  DL, MVT::v8i16, V1, V2),
7576  DAG.getUNDEF(MVT::v8i16), Mask);
7577 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
void push_back(const T &Elt)
Definition: SmallVector.h:225
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path)
Get begin iterator over path.
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
assert(Globals.size() > 1)
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:104
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue lowerV8I16SingleInputVectorShuffle ( SDLoc  DL,
SDValue  V,
MutableArrayRef< int >  Mask,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 shuffle lowering, and the most complex part.

The lowering strategy is to try to form pairs of input lanes which are targeted at the same half of the final vector, and then use a dword shuffle to place them onto the right half, and finally unpack the paired lanes into their final position.

The exact breakdown of how to form these dword pairs and align them on the correct sides is really tricky. See the comments within the function for more of the details.

Definition at line 7157 of file X86ISelLowering.cpp.

7159  {
7160  assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
7161  MutableArrayRef<int> LoMask = Mask.slice(0, 4);
7162  MutableArrayRef<int> HiMask = Mask.slice(4, 4);
7163 
7164  SmallVector<int, 4> LoInputs;
7165  std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
7166  [](int M) { return M >= 0; });
7167  std::sort(LoInputs.begin(), LoInputs.end());
7168  LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
7169  SmallVector<int, 4> HiInputs;
7170  std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
7171  [](int M) { return M >= 0; });
7172  std::sort(HiInputs.begin(), HiInputs.end());
7173  HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
7174  int NumLToL =
7175  std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
7176  int NumHToL = LoInputs.size() - NumLToL;
7177  int NumLToH =
7178  std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
7179  int NumHToH = HiInputs.size() - NumLToH;
7180  MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
7181  MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
7182  MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
7183  MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
7184 
7185  // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
7186  // such inputs we can swap two of the dwords across the half mark and end up
7187  // with <=2 inputs to each half in each half. Once there, we can fall through
7188  // to the generic code below. For example:
7189  //
7190  // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
7191  // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
7192  //
7193  // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2
7194  // and 2-2.
7195  auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput,
7196  int ThreeInputHalfSum, int OneInputHalfOffset) {
7197  // Compute the index of dword with only one word among the three inputs in
7198  // a half by taking the sum of the half with three inputs and subtracting
7199  // the sum of the actual three inputs. The difference is the remaining
7200  // slot.
7201  int DWordA = (ThreeInputHalfSum -
7202  std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) /
7203  2;
7204  int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2;
7205 
7206  int PSHUFDMask[] = {0, 1, 2, 3};
7207  PSHUFDMask[DWordA] = DWordB;
7208  PSHUFDMask[DWordB] = DWordA;
7209  V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
7210  DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
7211  DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
7212  getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
7213 
7214  // Adjust the mask to match the new locations of A and B.
7215  for (int &M : Mask)
7216  if (M != -1 && M/2 == DWordA)
7217  M = 2 * DWordB + M % 2;
7218  else if (M != -1 && M/2 == DWordB)
7219  M = 2 * DWordA + M % 2;
7220 
7221  // Recurse back into this routine to re-compute state now that this isn't
7222  // a 3 and 1 problem.
7223  return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
7224  Mask);
7225  };
7226  if (NumLToL == 3 && NumHToL == 1)
7227  return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4);
7228  else if (NumLToL == 1 && NumHToL == 3)
7229  return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0);
7230  else if (NumLToH == 1 && NumHToH == 3)
7231  return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0);
7232  else if (NumLToH == 3 && NumHToH == 1)
7233  return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4);
7234 
7235  // At this point there are at most two inputs to the low and high halves from
7236  // each half. That means the inputs can always be grouped into dwords and
7237  // those dwords can then be moved to the correct half with a dword shuffle.
7238  // We use at most one low and one high word shuffle to collect these paired
7239  // inputs into dwords, and finally a dword shuffle to place them.
7240  int PSHUFLMask[4] = {-1, -1, -1, -1};
7241  int PSHUFHMask[4] = {-1, -1, -1, -1};
7242  int PSHUFDMask[4] = {-1, -1, -1, -1};
7243 
7244  // First fix the masks for all the inputs that are staying in their
7245  // original halves. This will then dictate the targets of the cross-half
7246  // shuffles.
7247  auto fixInPlaceInputs = [&PSHUFDMask](
7248  ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask,
7249  MutableArrayRef<int> HalfMask, int HalfOffset) {
7250  if (InPlaceInputs.empty())
7251  return;
7252  if (InPlaceInputs.size() == 1) {
7253  SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
7254  InPlaceInputs[0] - HalfOffset;
7255  PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
7256  return;
7257  }
7258 
7259  assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
7260  SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
7261  InPlaceInputs[0] - HalfOffset;
7262  // Put the second input next to the first so that they are packed into
7263  // a dword. We find the adjacent index by toggling the low bit.
7264  int AdjIndex = InPlaceInputs[0] ^ 1;
7265  SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
7266  std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
7267  PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
7268  };
7269  if (!HToLInputs.empty())
7270  fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0);
7271  if (!LToHInputs.empty())
7272  fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4);
7273 
7274  // Now gather the cross-half inputs and place them into a free dword of
7275  // their target half.
7276  // FIXME: This operation could almost certainly be simplified dramatically to
7277  // look more like the 3-1 fixing operation.
7278  auto moveInputsToRightHalf = [&PSHUFDMask](
7279  MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
7280  MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
7281  int SourceOffset, int DestOffset) {
7282  auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
7283  return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
7284  };
7285  auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
7286  int Word) {
7287  int LowWord = Word & ~1;
7288  int HighWord = Word | 1;
7289  return isWordClobbered(SourceHalfMask, LowWord) ||
7290  isWordClobbered(SourceHalfMask, HighWord);
7291  };
7292 
7293  if (IncomingInputs.empty())
7294  return;
7295 
7296  if (ExistingInputs.empty()) {
7297  // Map any dwords with inputs from them into the right half.
7298  for (int Input : IncomingInputs) {
7299  // If the source half mask maps over the inputs, turn those into
7300  // swaps and use the swapped lane.
7301  if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
7302  if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) {
7303  SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
7304  Input - SourceOffset;
7305  // We have to swap the uses in our half mask in one sweep.
7306  for (int &M : HalfMask)
7307  if (M == SourceHalfMask[Input - SourceOffset])
7308  M = Input;
7309  else if (M == Input)
7310  M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
7311  } else {
7312  assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
7313  Input - SourceOffset &&
7314  "Previous placement doesn't match!");
7315  }
7316  // Note that this correctly re-maps both when we do a swap and when
7317  // we observe the other side of the swap above. We rely on that to
7318  // avoid swapping the members of the input list directly.
7319  Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
7320  }
7321 
7322  // Map the input's dword into the correct half.
7323  if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1)
7324  PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
7325  else
7326  assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
7327  Input / 2 &&
7328  "Previous placement doesn't match!");
7329  }
7330 
7331  // And just directly shift any other-half mask elements to be same-half
7332  // as we will have mirrored the dword containing the element into the
7333  // same position within that half.
7334  for (int &M : HalfMask)
7335  if (M >= SourceOffset && M < SourceOffset + 4) {
7336  M = M - SourceOffset + DestOffset;
7337  assert(M >= 0 && "This should never wrap below zero!");
7338  }
7339  return;
7340  }
7341 
7342  // Ensure we have the input in a viable dword of its current half. This
7343  // is particularly tricky because the original position may be clobbered
7344  // by inputs being moved and *staying* in that half.
7345  if (IncomingInputs.size() == 1) {
7346  if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
7347  int InputFixed = std::find(std::begin(SourceHalfMask),
7348  std::end(SourceHalfMask), -1) -
7349  std::begin(SourceHalfMask) + SourceOffset;
7350  SourceHalfMask[InputFixed - SourceOffset] =
7351  IncomingInputs[0] - SourceOffset;
7352  std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
7353  InputFixed);
7354  IncomingInputs[0] = InputFixed;
7355  }
7356  } else if (IncomingInputs.size() == 2) {
7357  if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
7358  isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
7359  int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2;
7360  assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) &&
7361  "Not all dwords can be clobbered!");
7362  SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset;
7363  SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset;
7364  for (int &M : HalfMask)
7365  if (M == IncomingInputs[0])
7366  M = SourceDWordBase + SourceOffset;
7367  else if (M == IncomingInputs[1])
7368  M = SourceDWordBase + 1 + SourceOffset;
7369  IncomingInputs[0] = SourceDWordBase + SourceOffset;
7370  IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset;
7371  }
7372  } else {
7373  llvm_unreachable("Unhandled input size!");
7374  }
7375 
7376  // Now hoist the DWord down to the right half.
7377  int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
7378  assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
7379  PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
7380  for (int Input : IncomingInputs)
7381  std::replace(HalfMask.begin(), HalfMask.end(), Input,
7382  FreeDWord * 2 + Input % 2);
7383  };
7384  moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask,
7385  /*SourceOffset*/ 4, /*DestOffset*/ 0);
7386  moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask,
7387  /*SourceOffset*/ 0, /*DestOffset*/ 4);
7388 
7389  // Now enact all the shuffles we've computed to move the inputs into their
7390  // target half.
7391  if (!isNoopShuffleMask(PSHUFLMask))
7392  V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
7393  getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
7394  if (!isNoopShuffleMask(PSHUFHMask))
7395  V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
7396  getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
7397  if (!isNoopShuffleMask(PSHUFDMask))
7398  V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
7399  DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
7400  DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
7401  getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
7402 
7403  // At this point, each half should contain all its inputs, and we can then
7404  // just shuffle them into their final position.
7405  assert(std::count_if(LoMask.begin(), LoMask.end(),
7406  [](int M) { return M >= 4; }) == 0 &&
7407  "Failed to lift all the high half inputs to the low mask!");
7408  assert(std::count_if(HiMask.begin(), HiMask.end(),
7409  [](int M) { return M >= 0 && M < 4; }) == 0 &&
7410  "Failed to lift all the low half inputs to the high mask!");
7411 
7412  // Do a half shuffle for the low mask.
7413  if (!isNoopShuffleMask(LoMask))
7414  V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
7415  getV4X86ShuffleImm8ForMask(LoMask, DAG));
7416 
7417  // Do a half shuffle with the high mask after shifting its values down.
7418  for (int &M : HiMask)
7419  if (M >= 0)
7420  M -= 4;
7421  if (!isNoopShuffleMask(HiMask))
7422  V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
7423  getV4X86ShuffleImm8ForMask(HiMask, DAG));
7424 
7425  return V;
7426 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const_iterator end(StringRef path)
Get end iterator over path.
iterator end() const
empty - Check if the array is empty.
Definition: ArrayRef.h:98
const_iterator begin(StringRef path)
Get begin iterator over path.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
iterator begin() const
Definition: ArrayRef.h:231
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
assert(Globals.size() > 1)
static bool isNoopShuffleMask(ArrayRef< int > Mask)
Tiny helper function to identify a no-op mask.
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
iterator begin() const
empty - Check if the array is empty.
Definition: ArrayRef.h:97
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:104
iterator erase(iterator I)
Definition: SmallVector.h:450
static cl::opt< std::string > Input(cl::Positional, cl::desc("<input>"), cl::init("-"))
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MutableArrayRef< T > slice(unsigned N) const
slice(n) - Chop off the first N elements of the array.
Definition: ArrayRef.h:250
pointer data()
data - Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:136
iterator end() const
Definition: ArrayRef.h:232
static SDValue lowerV8I16VectorShuffle ( SDValue  Op,
SDValue  V1,
SDValue  V2,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Generic lowering of 8-lane i16 shuffles.

This handles both single-input shuffles and combined shuffle/blends with two inputs. The single input shuffles are immediately delegated to a dedicated lowering routine.

The blends are lowered in one of three fundamental ways. If there are few enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle of the input is significantly cheaper when lowered as an interleaving of the two inputs, try to interleave them. Otherwise, blend the low and high halves of the inputs separately (making them have relatively few inputs) and then concatenate them.

Definition at line 7591 of file X86ISelLowering.cpp.

7593  {
7594  SDLoc DL(Op);
7595  assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
7596  assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
7597  assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
7598  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7599  ArrayRef<int> OrigMask = SVOp->getMask();
7600  int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
7601  OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]};
7602  MutableArrayRef<int> Mask(MaskStorage);
7603 
7604  assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
7605 
7606  auto isV1 = [](int M) { return M >= 0 && M < 8; };
7607  auto isV2 = [](int M) { return M >= 8; };
7608 
7609  int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1);
7610  int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2);
7611 
7612  if (NumV2Inputs == 0)
7613  return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG);
7614 
7615  assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
7616  "to be V1-input shuffles.");
7617 
7618  if (NumV1Inputs + NumV2Inputs <= 4)
7619  return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
7620 
7621  // Check whether an interleaving lowering is likely to be more efficient.
7622  // This isn't perfect but it is a strong heuristic that tends to work well on
7623  // the kinds of shuffles that show up in practice.
7624  //
7625  // FIXME: Handle 1x, 2x, and 4x interleaving.
7626  if (shouldLowerAsInterleaving(Mask)) {
7627  // FIXME: Figure out whether we should pack these into the low or high
7628  // halves.
7629 
7630  int EMask[8], OMask[8];
7631  for (int i = 0; i < 4; ++i) {
7632  EMask[i] = Mask[2*i];
7633  OMask[i] = Mask[2*i + 1];
7634  EMask[i + 4] = -1;
7635  OMask[i + 4] = -1;
7636  }
7637 
7638  SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask);
7639  SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask);
7640 
7641  return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds);
7642  }
7643 
7644  int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7645  int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
7646 
7647  for (int i = 0; i < 4; ++i) {
7648  LoBlendMask[i] = Mask[i];
7649  HiBlendMask[i] = Mask[i + 4];
7650  }
7651 
7652  SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
7653  SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
7654  LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV);
7655  HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV);
7656 
7657  return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
7658  DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
7659 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static bool shouldLowerAsInterleaving(ArrayRef< int > Mask)
Detect whether the mask pattern should be lowered through interleaving.
static SDValue lowerV8I16SingleInputVectorShuffle(SDLoc DL, SDValue V, MutableArrayRef< int > Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 shuffle lowering, and the most complex part.
static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1, SDValue V2, MutableArrayRef< int > Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Blend two v8i16 vectors using a naive unpack strategy.
static SDValue LowerVACOPY ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 13356 of file X86ISelLowering.cpp.

13357  {
13358  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
13359  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
13360  SDValue Chain = Op.getOperand(0);
13361  SDValue DstPtr = Op.getOperand(1);
13362  SDValue SrcPtr = Op.getOperand(2);
13363  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
13364  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
13365  SDLoc DL(Op);
13366 
13367  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
13368  DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
13369  false,
13370  MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
13371 }
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
LLVM Value Representation.
Definition: Value.h:69
static SDValue LowerVECTOR_SHUFFLE_128v4 ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG 
)
static

LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with 4 elements, and match them with several different shuffle types.

Definition at line 8753 of file X86ISelLowering.cpp.

8753  {
8754  SDValue V1 = SVOp->getOperand(0);
8755  SDValue V2 = SVOp->getOperand(1);
8756  SDLoc dl(SVOp);
8757  MVT VT = SVOp->getSimpleValueType(0);
8758 
8759  assert(VT.is128BitVector() && "Unsupported vector size");
8760 
8761  std::pair<int, int> Locs[4];
8762  int Mask1[] = { -1, -1, -1, -1 };
8763  SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
8764 
8765  unsigned NumHi = 0;
8766  unsigned NumLo = 0;
8767  for (unsigned i = 0; i != 4; ++i) {
8768  int Idx = PermMask[i];
8769  if (Idx < 0) {
8770  Locs[i] = std::make_pair(-1, -1);
8771  } else {
8772  assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
8773  if (Idx < 4) {
8774  Locs[i] = std::make_pair(0, NumLo);
8775  Mask1[NumLo] = Idx;
8776  NumLo++;
8777  } else {
8778  Locs[i] = std::make_pair(1, NumHi);
8779  if (2+NumHi < 4)
8780  Mask1[2+NumHi] = Idx;
8781  NumHi++;
8782  }
8783  }
8784  }
8785 
8786  if (NumLo <= 2 && NumHi <= 2) {
8787  // If no more than two elements come from either vector. This can be
8788  // implemented with two shuffles. First shuffle gather the elements.
8789  // The second shuffle, which takes the first shuffle as both of its
8790  // vector operands, put the elements into the right order.
8791  V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
8792 
8793  int Mask2[] = { -1, -1, -1, -1 };
8794 
8795  for (unsigned i = 0; i != 4; ++i)
8796  if (Locs[i].first != -1) {
8797  unsigned Idx = (i < 2) ? 0 : 4;
8798  Idx += Locs[i].first * 2 + Locs[i].second;
8799  Mask2[i] = Idx;
8800  }
8801 
8802  return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
8803  }
8804 
8805  if (NumLo == 3 || NumHi == 3) {
8806  // Otherwise, we must have three elements from one vector, call it X, and
8807  // one element from the other, call it Y. First, use a shufps to build an
8808  // intermediate vector with the one element from Y and the element from X
8809  // that will be in the same half in the final destination (the indexes don't
8810  // matter). Then, use a shufps to build the final vector, taking the half
8811  // containing the element from Y from the intermediate, and the other half
8812  // from X.
8813  if (NumHi == 3) {
8814  // Normalize it so the 3 elements come from V1.
8815  CommuteVectorShuffleMask(PermMask, 4);
8816  std::swap(V1, V2);
8817  }
8818 
8819  // Find the element from V2.
8820  unsigned HiIndex;
8821  for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
8822  int Val = PermMask[HiIndex];
8823  if (Val < 0)
8824  continue;
8825  if (Val >= 4)
8826  break;
8827  }
8828 
8829  Mask1[0] = PermMask[HiIndex];
8830  Mask1[1] = -1;
8831  Mask1[2] = PermMask[HiIndex^1];
8832  Mask1[3] = -1;
8833  V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
8834 
8835  if (HiIndex >= 2) {
8836  Mask1[0] = PermMask[0];
8837  Mask1[1] = PermMask[1];
8838  Mask1[2] = HiIndex & 1 ? 6 : 4;
8839  Mask1[3] = HiIndex & 1 ? 4 : 6;
8840  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
8841  }
8842 
8843  Mask1[0] = HiIndex & 1 ? 2 : 0;
8844  Mask1[1] = HiIndex & 1 ? 0 : 2;
8845  Mask1[2] = PermMask[2];
8846  Mask1[3] = PermMask[3];
8847  if (Mask1[2] >= 0)
8848  Mask1[2] += 4;
8849  if (Mask1[3] >= 0)
8850  Mask1[3] += 4;
8851  return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
8852  }
8853 
8854  // Break it into (shuffle shuffle_hi, shuffle_lo).
8855  int LoMask[] = { -1, -1, -1, -1 };
8856  int HiMask[] = { -1, -1, -1, -1 };
8857 
8858  int *MaskPtr = LoMask;
8859  unsigned MaskIdx = 0;
8860  unsigned LoIdx = 0;
8861  unsigned HiIdx = 2;
8862  for (unsigned i = 0; i != 4; ++i) {
8863  if (i == 2) {
8864  MaskPtr = HiMask;
8865  MaskIdx = 1;
8866  LoIdx = 0;
8867  HiIdx = 2;
8868  }
8869  int Idx = PermMask[i];
8870  if (Idx < 0) {
8871  Locs[i] = std::make_pair(-1, -1);
8872  } else if (Idx < 4) {
8873  Locs[i] = std::make_pair(MaskIdx, LoIdx);
8874  MaskPtr[LoIdx] = Idx;
8875  LoIdx++;
8876  } else {
8877  Locs[i] = std::make_pair(MaskIdx, HiIdx);
8878  MaskPtr[HiIdx] = Idx;
8879  HiIdx++;
8880  }
8881  }
8882 
8883  SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
8884  SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
8885  int MaskOps[] = { -1, -1, -1, -1 };
8886  for (unsigned i = 0; i != 4; ++i)
8887  if (Locs[i].first != -1)
8888  MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
8889  return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
8890 }
static void CommuteVectorShuffleMask(SmallVectorImpl< int > &Mask, unsigned NumElems)
const SDValue & getOperand(unsigned Num) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
assert(Globals.size() > 1)
ArrayRef< int > getMask() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerVECTOR_SHUFFLE_256 ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG 
)
static

LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles which could not be matched by any known target speficic shuffle

Definition at line 8640 of file X86ISelLowering.cpp.

8640  {
8641 
8642  SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
8643  if (NewOp.getNode())
8644  return NewOp;
8645 
8646  MVT VT = SVOp->getSimpleValueType(0);
8647 
8648  unsigned NumElems = VT.getVectorNumElements();
8649  unsigned NumLaneElems = NumElems / 2;
8650 
8651  SDLoc dl(SVOp);
8652  MVT EltVT = VT.getVectorElementType();
8653  MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
8654  SDValue Output[2];
8655 
8656  SmallVector<int, 16> Mask;
8657  for (unsigned l = 0; l < 2; ++l) {
8658  // Build a shuffle mask for the output, discovering on the fly which
8659  // input vectors to use as shuffle operands (recorded in InputUsed).
8660  // If building a suitable shuffle vector proves too hard, then bail
8661  // out with UseBuildVector set.
8662  bool UseBuildVector = false;
8663  int InputUsed[2] = { -1, -1 }; // Not yet discovered.
8664  unsigned LaneStart = l * NumLaneElems;
8665  for (unsigned i = 0; i != NumLaneElems; ++i) {
8666  // The mask element. This indexes into the input.
8667  int Idx = SVOp->getMaskElt(i+LaneStart);
8668  if (Idx < 0) {
8669  // the mask element does not index into any input vector.
8670  Mask.push_back(-1);
8671  continue;
8672  }
8673 
8674  // The input vector this mask element indexes into.
8675  int Input = Idx / NumLaneElems;
8676 
8677  // Turn the index into an offset from the start of the input vector.
8678  Idx -= Input * NumLaneElems;
8679 
8680  // Find or create a shuffle vector operand to hold this input.
8681  unsigned OpNo;
8682  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
8683  if (InputUsed[OpNo] == Input)
8684  // This input vector is already an operand.
8685  break;
8686  if (InputUsed[OpNo] < 0) {
8687  // Create a new operand for this input vector.
8688  InputUsed[OpNo] = Input;
8689  break;
8690  }
8691  }
8692 
8693  if (OpNo >= array_lengthof(InputUsed)) {
8694  // More than two input vectors used! Give up on trying to create a
8695  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
8696  UseBuildVector = true;
8697  break;
8698  }
8699 
8700  // Add the mask index for the new shuffle vector.
8701  Mask.push_back(Idx + OpNo * NumLaneElems);
8702  }
8703 
8704  if (UseBuildVector) {
8706  for (unsigned i = 0; i != NumLaneElems; ++i) {
8707  // The mask element. This indexes into the input.
8708  int Idx = SVOp->getMaskElt(i+LaneStart);
8709  if (Idx < 0) {
8710  SVOps.push_back(DAG.getUNDEF(EltVT));
8711  continue;
8712  }
8713 
8714  // The input vector this mask element indexes into.
8715  int Input = Idx / NumElems;
8716 
8717  // Turn the index into an offset from the start of the input vector.
8718  Idx -= Input * NumElems;
8719 
8720  // Extract the vector element by hand.
8721  SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8722  SVOp->getOperand(Input),
8723  DAG.getIntPtrConstant(Idx)));
8724  }
8725 
8726  // Construct the output using a BUILD_VECTOR.
8727  Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps);
8728  } else if (InputUsed[0] < 0) {
8729  // No input vectors were used! The result is undefined.
8730  Output[l] = DAG.getUNDEF(NVT);
8731  } else {
8732  SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
8733  (InputUsed[0] % 2) * NumLaneElems,
8734  DAG, dl);
8735  // If only one input was used, use an undefined vector for the other.
8736  SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
8737  Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
8738  (InputUsed[1] % 2) * NumLaneElems, DAG, dl);
8739  // At least one input vector was used. Create a new shuffle vector.
8740  Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
8741  }
8742 
8743  Mask.clear();
8744  }
8745 
8746  // Concatenate the result back
8747  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
8748 }
const SDValue & getOperand(unsigned Num) const
int getMaskElt(unsigned Idx) const
static SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
LLVM_CONSTEXPR size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:295
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
static cl::opt< std::string > Input(cl::Positional, cl::desc("<input>"), cl::init("-"))
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerVECTOR_SHUFFLEtoBlend ( ShuffleVectorSDNode SVOp,
unsigned  MaskValue,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 8054 of file X86ISelLowering.cpp.

8057  {
8058  MVT VT = SVOp->getSimpleValueType(0);
8059  MVT EltVT = VT.getVectorElementType();
8060  assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
8061  Subtarget->hasInt256() && "Trying to lower a "
8062  "VECTOR_SHUFFLE to a Blend but "
8063  "with the wrong mask"));
8064  SDValue V1 = SVOp->getOperand(0);
8065  SDValue V2 = SVOp->getOperand(1);
8066  SDLoc dl(SVOp);
8067  unsigned NumElems = VT.getVectorNumElements();
8068 
8069  // Convert i32 vectors to floating point if it is not AVX2.
8070  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
8071  MVT BlendVT = VT;
8072  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
8073  BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
8074  NumElems);
8075  V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
8076  V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
8077  }
8078 
8079  SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
8080  DAG.getConstant(MaskValue, MVT::i32));
8081  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
8082 }
unsigned getSizeInBits() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
static bool isBlendMask(ArrayRef< int > MaskVals, MVT VT, bool hasSSE41, bool hasInt256, unsigned *MaskOut=nullptr)
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
BLENDI - Blend where the selector is an immediate.
ArrayRef< int > getMask() const
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerVECTOR_SHUFFLEv16i16 ( SDValue  Op,
SelectionDAG DAG 
)
static

v16i16 shuffles

FIXME: We only support generation of a single pshufb currently. We can generalize the other applicable cases from LowerVECTOR_SHUFFLEv8i16 as well (e.g 2 x pshufb + 1 x por).

Definition at line 8382 of file X86ISelLowering.cpp.

8382  {
8383  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8384  SDValue V1 = SVOp->getOperand(0);
8385  SDValue V2 = SVOp->getOperand(1);
8386  SDLoc dl(SVOp);
8387 
8388  if (V2.getOpcode() != ISD::UNDEF)
8389  return SDValue();
8390 
8391  SmallVector<int, 16> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
8392  return getPSHUFB(MaskVals, V1, dl, DAG);
8393 }
const SDValue & getOperand(unsigned Num) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
unsigned getOpcode() const
static SDValue getPSHUFB(ArrayRef< int > MaskVals, SDValue V1, SDLoc &dl, SelectionDAG &DAG)
ArrayRef< int > getMask() const
static SDValue LowerVECTOR_SHUFFLEv16i8 ( ShuffleVectorSDNode SVOp,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 8399 of file X86ISelLowering.cpp.

8401  {
8402  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8403  SDValue V1 = SVOp->getOperand(0);
8404  SDValue V2 = SVOp->getOperand(1);
8405  SDLoc dl(SVOp);
8406  ArrayRef<int> MaskVals = SVOp->getMask();
8407 
8408  // Promote splats to a larger type which usually leads to more efficient code.
8409  // FIXME: Is this true if pshufb is available?
8410  if (SVOp->isSplat())
8411  return PromoteSplat(SVOp, DAG);
8412 
8413  // If we have SSSE3, case 1 is generated when all result bytes come from
8414  // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
8415  // present, fall back to case 3.
8416 
8417  // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
8418  if (Subtarget->hasSSSE3()) {
8419  SmallVector<SDValue,16> pshufbMask;
8420 
8421  // If all result elements are from one input vector, then only translate
8422  // undef mask values to 0x80 (zero out result) in the pshufb mask.
8423  //
8424  // Otherwise, we have elements from both input vectors, and must zero out
8425  // elements that come from V2 in the first mask, and V1 in the second mask
8426  // so that we can OR them together.
8427  for (unsigned i = 0; i != 16; ++i) {
8428  int EltIdx = MaskVals[i];
8429  if (EltIdx < 0 || EltIdx >= 16)
8430  EltIdx = 0x80;
8431  pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
8432  }
8433  V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
8434  DAG.getNode(ISD::BUILD_VECTOR, dl,
8435  MVT::v16i8, pshufbMask));
8436 
8437  // As PSHUFB will zero elements with negative indices, it's safe to ignore
8438  // the 2nd operand if it's undefined or zero.
8439  if (V2.getOpcode() == ISD::UNDEF ||
8441  return V1;
8442 
8443  // Calculate the shuffle mask for the second input, shuffle it, and
8444  // OR it with the first shuffled input.
8445  pshufbMask.clear();
8446  for (unsigned i = 0; i != 16; ++i) {
8447  int EltIdx = MaskVals[i];
8448  EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
8449  pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
8450  }
8451  V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
8452  DAG.getNode(ISD::BUILD_VECTOR, dl,
8453  MVT::v16i8, pshufbMask));
8454  return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
8455  }
8456 
8457  // No SSSE3 - Calculate in place words and then fix all out of place words
8458  // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
8459  // the 16 different words that comprise the two doublequadword input vectors.
8460  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8461  V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
8462  SDValue NewV = V1;
8463  for (int i = 0; i != 8; ++i) {
8464  int Elt0 = MaskVals[i*2];
8465  int Elt1 = MaskVals[i*2+1];
8466 
8467  // This word of the result is all undef, skip it.
8468  if (Elt0 < 0 && Elt1 < 0)
8469  continue;
8470 
8471  // This word of the result is already in the correct place, skip it.
8472  if ((Elt0 == i*2) && (Elt1 == i*2+1))
8473  continue;
8474 
8475  SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
8476  SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
8477  SDValue InsElt;
8478 
8479  // If Elt0 and Elt1 are defined, are consecutive, and can be load
8480  // using a single extract together, load it and store it.
8481  if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
8482  InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
8483  DAG.getIntPtrConstant(Elt1 / 2));
8484  NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
8485  DAG.getIntPtrConstant(i));
8486  continue;
8487  }
8488 
8489  // If Elt1 is defined, extract it from the appropriate source. If the
8490  // source byte is not also odd, shift the extracted word left 8 bits
8491  // otherwise clear the bottom 8 bits if we need to do an or.
8492  if (Elt1 >= 0) {
8493  InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
8494  DAG.getIntPtrConstant(Elt1 / 2));
8495  if ((Elt1 & 1) == 0)
8496  InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
8497  DAG.getConstant(8,
8498  TLI.getShiftAmountTy(InsElt.getValueType())));
8499  else if (Elt0 >= 0)
8500  InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
8501  DAG.getConstant(0xFF00, MVT::i16));
8502  }
8503  // If Elt0 is defined, extract it from the appropriate source. If the
8504  // source byte is not also even, shift the extracted word right 8 bits. If
8505  // Elt1 was also defined, OR the extracted values together before
8506  // inserting them in the result.
8507  if (Elt0 >= 0) {
8508  SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
8509  Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
8510  if ((Elt0 & 1) != 0)
8511  InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
8512  DAG.getConstant(8,
8513  TLI.getShiftAmountTy(InsElt0.getValueType())));
8514  else if (Elt1 >= 0)
8515  InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
8516  DAG.getConstant(0x00FF, MVT::i16));
8517  InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
8518  : InsElt0;
8519  }
8520  NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
8521  DAG.getIntPtrConstant(i));
8522  }
8523  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
8524 }
PSHUFB - Shuffle 16 8-bit values within a vector.
const SDValue & getOperand(unsigned Num) const
EVT getShiftAmountTy(EVT LHSTy) const
bool isBuildVectorAllZeros(const SDNode *N)
bool hasSSSE3() const
Definition: X86Subtarget.h:314
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG)
PromoteSplat - Splat is promoted to target supported vector shuffles.
unsigned getOpcode() const
ArrayRef< int > getMask() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
static SDValue LowerVECTOR_SHUFFLEv32i8 ( ShuffleVectorSDNode SVOp,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 8528 of file X86ISelLowering.cpp.

8530  {
8531  MVT VT = SVOp->getSimpleValueType(0);
8532  SDValue V1 = SVOp->getOperand(0);
8533  SDValue V2 = SVOp->getOperand(1);
8534  SDLoc dl(SVOp);
8535  SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
8536 
8537  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
8538  bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
8539  bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
8540 
8541  // VPSHUFB may be generated if
8542  // (1) one of input vector is undefined or zeroinitializer.
8543  // The mask value 0x80 puts 0 in the corresponding slot of the vector.
8544  // And (2) the mask indexes don't cross the 128-bit lane.
8545  if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
8546  (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
8547  return SDValue();
8548 
8549  if (V1IsAllZero && !V2IsAllZero) {
8550  CommuteVectorShuffleMask(MaskVals, 32);
8551  V1 = V2;
8552  }
8553  return getPSHUFB(MaskVals, V1, dl, DAG);
8554 }
static void CommuteVectorShuffleMask(SmallVectorImpl< int > &Mask, unsigned NumElems)
const SDValue & getOperand(unsigned Num) const
bool isBuildVectorAllZeros(const SDNode *N)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getOpcode() const
static SDValue getPSHUFB(ArrayRef< int > MaskVals, SDValue V1, SDLoc &dl, SelectionDAG &DAG)
ArrayRef< int > getMask() const
bool hasInt256() const
Definition: X86Subtarget.h:321
MVT getSimpleValueType(unsigned ResNo) const
static SDValue LowerVECTOR_SHUFFLEv8i16 ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 8142 of file X86ISelLowering.cpp.

8143  {
8144  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
8145  SDValue V1 = SVOp->getOperand(0);
8146  SDValue V2 = SVOp->getOperand(1);
8147  SDLoc dl(SVOp);
8148  SmallVector<int, 8> MaskVals;
8149 
8150  // Determine if more than 1 of the words in each of the low and high quadwords
8151  // of the result come from the same quadword of one of the two inputs. Undef
8152  // mask values count as coming from any quadword, for better codegen.
8153  //
8154  // Lo/HiQuad[i] = j indicates how many words from the ith quad of the input
8155  // feeds this quad. For i, 0 and 1 refer to V1, 2 and 3 refer to V2.
8156  unsigned LoQuad[] = { 0, 0, 0, 0 };
8157  unsigned HiQuad[] = { 0, 0, 0, 0 };
8158  // Indices of quads used.
8159  std::bitset<4> InputQuads;
8160  for (unsigned i = 0; i < 8; ++i) {
8161  unsigned *Quad = i < 4 ? LoQuad : HiQuad;
8162  int EltIdx = SVOp->getMaskElt(i);
8163  MaskVals.push_back(EltIdx);
8164  if (EltIdx < 0) {
8165  ++Quad[0];
8166  ++Quad[1];
8167  ++Quad[2];
8168  ++Quad[3];
8169  continue;
8170  }
8171  ++Quad[EltIdx / 4];
8172  InputQuads.set(EltIdx / 4);
8173  }
8174 
8175  int BestLoQuad = -1;
8176  unsigned MaxQuad = 1;
8177  for (unsigned i = 0; i < 4; ++i) {
8178  if (LoQuad[i] > MaxQuad) {
8179  BestLoQuad = i;
8180  MaxQuad = LoQuad[i];
8181  }
8182  }
8183 
8184  int BestHiQuad = -1;
8185  MaxQuad = 1;
8186  for (unsigned i = 0; i < 4; ++i) {
8187  if (HiQuad[i] > MaxQuad) {
8188  BestHiQuad = i;
8189  MaxQuad = HiQuad[i];
8190  }
8191  }
8192 
8193  // For SSSE3, If all 8 words of the result come from only 1 quadword of each
8194  // of the two input vectors, shuffle them into one input vector so only a
8195  // single pshufb instruction is necessary. If there are more than 2 input
8196  // quads, disable the next transformation since it does not help SSSE3.
8197  bool V1Used = InputQuads[0] || InputQuads[1];
8198  bool V2Used = InputQuads[2] || InputQuads[3];
8199  if (Subtarget->hasSSSE3()) {
8200  if (InputQuads.count() == 2 && V1Used && V2Used) {
8201  BestLoQuad = InputQuads[0] ? 0 : 1;
8202  BestHiQuad = InputQuads[2] ? 2 : 3;
8203  }
8204  if (InputQuads.count() > 2) {
8205  BestLoQuad = -1;
8206  BestHiQuad = -1;
8207  }
8208  }
8209 
8210  // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
8211  // the shuffle mask. If a quad is scored as -1, that means that it contains
8212  // words from all 4 input quadwords.
8213  SDValue NewV;
8214  if (BestLoQuad >= 0 || BestHiQuad >= 0) {
8215  int MaskV[] = {
8216  BestLoQuad < 0 ? 0 : BestLoQuad,
8217  BestHiQuad < 0 ? 1 : BestHiQuad
8218  };
8219  NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
8220  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
8221  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
8222  NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
8223 
8224  // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
8225  // source words for the shuffle, to aid later transformations.
8226  bool AllWordsInNewV = true;
8227  bool InOrder[2] = { true, true };
8228  for (unsigned i = 0; i != 8; ++i) {
8229  int idx = MaskVals[i];
8230  if (idx != (int)i)
8231  InOrder[i/4] = false;
8232  if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
8233  continue;
8234  AllWordsInNewV = false;
8235  break;
8236  }
8237 
8238  bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
8239  if (AllWordsInNewV) {
8240  for (int i = 0; i != 8; ++i) {
8241  int idx = MaskVals[i];
8242  if (idx < 0)
8243  continue;
8244  idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
8245  if ((idx != i) && idx < 4)
8246  pshufhw = false;
8247  if ((idx != i) && idx > 3)
8248  pshuflw = false;
8249  }
8250  V1 = NewV;
8251  V2Used = false;
8252  BestLoQuad = 0;
8253  BestHiQuad = 1;
8254  }
8255 
8256  // If we've eliminated the use of V2, and the new mask is a pshuflw or
8257  // pshufhw, that's as cheap as it gets. Return the new shuffle.
8258  if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
8259  unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
8260  unsigned TargetMask = 0;
8261  NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
8262  DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
8263  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
8264  TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
8266  V1 = NewV.getOperand(0);
8267  return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
8268  }
8269  }
8270 
8271  // Promote splats to a larger type which usually leads to more efficient code.
8272  // FIXME: Is this true if pshufb is available?
8273  if (SVOp->isSplat())
8274  return PromoteSplat(SVOp, DAG);
8275 
8276  // If we have SSSE3, and all words of the result are from 1 input vector,
8277  // case 2 is generated, otherwise case 3 is generated. If no SSSE3
8278  // is present, fall back to case 4.
8279  if (Subtarget->hasSSSE3()) {
8280  SmallVector<SDValue,16> pshufbMask;
8281 
8282  // If we have elements from both input vectors, set the high bit of the
8283  // shuffle mask element to zero out elements that come from V2 in the V1
8284  // mask, and elements that come from V1 in the V2 mask, so that the two
8285  // results can be OR'd together.
8286  bool TwoInputs = V1Used && V2Used;
8287  V1 = getPSHUFB(MaskVals, V1, dl, DAG);
8288  if (!TwoInputs)
8289  return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8290 
8291  // Calculate the shuffle mask for the second input, shuffle it, and
8292  // OR it with the first shuffled input.
8293  CommuteVectorShuffleMask(MaskVals, 8);
8294  V2 = getPSHUFB(MaskVals, V2, dl, DAG);
8295  V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
8296  return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
8297  }
8298 
8299  // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
8300  // and update MaskVals with new element order.
8301  std::bitset<8> InOrder;
8302  if (BestLoQuad >= 0) {
8303  int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
8304  for (int i = 0; i != 4; ++i) {
8305  int idx = MaskVals[i];
8306  if (idx < 0) {
8307  InOrder.set(i);
8308  } else if ((idx / 4) == BestLoQuad) {
8309  MaskV[i] = idx & 3;
8310  InOrder.set(i);
8311  }
8312  }
8313  NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
8314  &MaskV[0]);
8315 
8316  if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
8317  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
8318  NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
8319  NewV.getOperand(0),
8320  getShufflePSHUFLWImmediate(SVOp), DAG);
8321  }
8322  }
8323 
8324  // If BestHi >= 0, generate a pshufhw to put the high elements in order,
8325  // and update MaskVals with the new element order.
8326  if (BestHiQuad >= 0) {
8327  int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
8328  for (unsigned i = 4; i != 8; ++i) {
8329  int idx = MaskVals[i];
8330  if (idx < 0) {
8331  InOrder.set(i);
8332  } else if ((idx / 4) == BestHiQuad) {
8333  MaskV[i] = (idx & 3) + 4;
8334  InOrder.set(i);
8335  }
8336  }
8337  NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
8338  &MaskV[0]);
8339 
8340  if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
8341  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
8342  NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
8343  NewV.getOperand(0),
8344  getShufflePSHUFHWImmediate(SVOp), DAG);
8345  }
8346  }
8347 
8348  // In case BestHi & BestLo were both -1, which means each quadword has a word
8349  // from each of the four input quadwords, calculate the InOrder bitvector now
8350  // before falling through to the insert/extract cleanup.
8351  if (BestLoQuad == -1 && BestHiQuad == -1) {
8352  NewV = V1;
8353  for (int i = 0; i != 8; ++i)
8354  if (MaskVals[i] < 0 || MaskVals[i] == i)
8355  InOrder.set(i);
8356  }
8357 
8358  // The other elements are put in the right place using pextrw and pinsrw.
8359  for (unsigned i = 0; i != 8; ++i) {
8360  if (InOrder[i])
8361  continue;
8362  int EltIdx = MaskVals[i];
8363  if (EltIdx < 0)
8364  continue;
8365  SDValue ExtOp = (EltIdx < 8) ?
8366  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
8367  DAG.getIntPtrConstant(EltIdx)) :
8368  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
8369  DAG.getIntPtrConstant(EltIdx - 8));
8370  NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
8371  DAG.getIntPtrConstant(i));
8372  }
8373  return NewV;
8374 }
static void CommuteVectorShuffleMask(SmallVectorImpl< int > &Mask, unsigned NumElems)
const SDValue & getOperand(unsigned Num) const
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N)
int getMaskElt(unsigned Idx) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
bool hasSSSE3() const
Definition: X86Subtarget.h:314
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N)
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG)
PromoteSplat - Splat is promoted to target supported vector shuffles.
unsigned getOpcode() const
static SDValue getPSHUFB(ArrayRef< int > MaskVals, SDValue V1, SDLoc &dl, SelectionDAG &DAG)
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue LowerVectorAllZeroTest ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 11542 of file X86ISelLowering.cpp.

11543  {
11544  assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
11545 
11546  if (!Subtarget->hasSSE41())
11547  return SDValue();
11548 
11549  if (!Op->hasOneUse())
11550  return SDValue();
11551 
11552  SDNode *N = Op.getNode();
11553  SDLoc DL(N);
11554 
11556  DenseMap<SDValue, unsigned> VecInMap;
11557  SmallVector<SDValue, 8> VecIns;
11558  EVT VT = MVT::Other;
11559 
11560  // Recognize a special case where a vector is casted into wide integer to
11561  // test all 0s.
11562  Opnds.push_back(N->getOperand(0));
11563  Opnds.push_back(N->getOperand(1));
11564 
11565  for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
11567  // BFS traverse all OR'd operands.
11568  if (I->getOpcode() == ISD::OR) {
11569  Opnds.push_back(I->getOperand(0));
11570  Opnds.push_back(I->getOperand(1));
11571  // Re-evaluate the number of nodes to be traversed.
11572  e += 2; // 2 more nodes (LHS and RHS) are pushed.
11573  continue;
11574  }
11575 
11576  // Quit if a non-EXTRACT_VECTOR_ELT
11577  if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11578  return SDValue();
11579 
11580  // Quit if without a constant index.
11581  SDValue Idx = I->getOperand(1);
11582  if (!isa<ConstantSDNode>(Idx))
11583  return SDValue();
11584 
11585  SDValue ExtractedFromVec = I->getOperand(0);
11586  DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
11587  if (M == VecInMap.end()) {
11588  VT = ExtractedFromVec.getValueType();
11589  // Quit if not 128/256-bit vector.
11590  if (!VT.is128BitVector() && !VT.is256BitVector())
11591  return SDValue();
11592  // Quit if not the same type.
11593  if (VecInMap.begin() != VecInMap.end() &&
11594  VT != VecInMap.begin()->first.getValueType())
11595  return SDValue();
11596  M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
11597  VecIns.push_back(ExtractedFromVec);
11598  }
11599  M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
11600  }
11601 
11602  assert((VT.is128BitVector() || VT.is256BitVector()) &&
11603  "Not extracted from 128-/256-bit vector.");
11604 
11605  unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
11606 
11608  I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
11609  // Quit if not all elements are used.
11610  if (I->second != FullMask)
11611  return SDValue();
11612  }
11613 
11614  EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
11615 
11616  // Cast all vectors into TestVT for PTEST.
11617  for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
11618  VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
11619 
11620  // If more than one full vectors are evaluated, OR them first before PTEST.
11621  for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
11622  // Each iteration will OR 2 nodes and append the result until there is only
11623  // 1 node left, i.e. the final OR'd value of all vectors.
11624  SDValue LHS = VecIns[Slot];
11625  SDValue RHS = VecIns[Slot + 1];
11626  VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
11627  }
11628 
11629  return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
11630  VecIns.back(), VecIns.back());
11631 }
bool hasOneUse() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
unsigned getOpcode() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:154
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
EVT getValueType() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue LowerVectorBroadcast ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

LowerVectorBroadcast - Attempt to use the vbroadcast instruction to generate a splat value for the following cases:

  1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
  2. A splat shuffle which uses a scalar_to_vector node which comes from a scalar load, or a constant. The VBROADCAST node is returned when a pattern is found, or SDValue() otherwise.

Definition at line 5743 of file X86ISelLowering.cpp.

5744  {
5745  if (!Subtarget->hasFp256())
5746  return SDValue();
5747 
5748  MVT VT = Op.getSimpleValueType();
5749  SDLoc dl(Op);
5750 
5751  assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
5752  "Unsupported vector type for broadcast.");
5753 
5754  SDValue Ld;
5755  bool ConstSplatVal;
5756 
5757  switch (Op.getOpcode()) {
5758  default:
5759  // Unknown pattern found.
5760  return SDValue();
5761 
5762  case ISD::BUILD_VECTOR: {
5763  auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
5764  BitVector UndefElements;
5765  SDValue Splat = BVOp->getSplatValue(&UndefElements);
5766 
5767  // We need a splat of a single value to use broadcast, and it doesn't
5768  // make any sense if the value is only in one element of the vector.
5769  if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
5770  return SDValue();
5771 
5772  Ld = Splat;
5773  ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
5774  Ld.getOpcode() == ISD::ConstantFP);
5775 
5776  // Make sure that all of the users of a non-constant load are from the
5777  // BUILD_VECTOR node.
5778  if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
5779  return SDValue();
5780  break;
5781  }
5782 
5783  case ISD::VECTOR_SHUFFLE: {
5784  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
5785 
5786  // Shuffles must have a splat mask where the first element is
5787  // broadcasted.
5788  if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
5789  return SDValue();
5790 
5791  SDValue Sc = Op.getOperand(0);
5792  if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
5793  Sc.getOpcode() != ISD::BUILD_VECTOR) {
5794 
5795  if (!Subtarget->hasInt256())
5796  return SDValue();
5797 
5798  // Use the register form of the broadcast instruction available on AVX2.
5799  if (VT.getSizeInBits() >= 256)
5800  Sc = Extract128BitVector(Sc, 0, DAG, dl);
5801  return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
5802  }
5803 
5804  Ld = Sc.getOperand(0);
5805  ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
5806  Ld.getOpcode() == ISD::ConstantFP);
5807 
5808  // The scalar_to_vector node and the suspected
5809  // load node must have exactly one user.
5810  // Constants may have multiple users.
5811 
5812  // AVX-512 has register version of the broadcast
5813  bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
5814  Ld.getValueType().getSizeInBits() >= 32;
5815  if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
5816  !hasRegVer))
5817  return SDValue();
5818  break;
5819  }
5820  }
5821 
5822  bool IsGE256 = (VT.getSizeInBits() >= 256);
5823 
5824  // Handle the broadcasting a single constant scalar from the constant pool
5825  // into a vector. On Sandybridge it is still better to load a constant vector
5826  // from the constant pool and not to broadcast it from a scalar.
5827  if (ConstSplatVal && Subtarget->hasInt256()) {
5828  EVT CVT = Ld.getValueType();
5829  assert(!CVT.isVector() && "Must not broadcast a vector type");
5830  unsigned ScalarSize = CVT.getSizeInBits();
5831 
5832  if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
5833  const Constant *C = nullptr;
5834  if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
5835  C = CI->getConstantIntValue();
5836  else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
5837  C = CF->getConstantFPValue();
5838 
5839  assert(C && "Invalid constant type");
5840 
5841  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5842  SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
5843  unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
5844  Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
5845  MachinePointerInfo::getConstantPool(),
5846  false, false, false, Alignment);
5847 
5848  return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
5849  }
5850  }
5851 
5852  bool IsLoad = ISD::isNormalLoad(Ld.getNode());
5853  unsigned ScalarSize = Ld.getValueType().getSizeInBits();
5854 
5855  // Handle AVX2 in-register broadcasts.
5856  if (!IsLoad && Subtarget->hasInt256() &&
5857  (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
5858  return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
5859 
5860  // The scalar source must be a normal load.
5861  if (!IsLoad)
5862  return SDValue();
5863 
5864  if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))
5865  return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
5866 
5867  // The integer check is needed for the 64-bit into 128-bit so it doesn't match
5868  // double since there is no vbroadcastsd xmm
5869  if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
5870  if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
5871  return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
5872  }
5873 
5874  // Unsupported broadcast.
5875  return SDValue();
5876 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasOneUse() const
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getSizeInBits() const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
int getMaskElt(unsigned Idx) const
virtual MVT getPointerTy(uint32_t=0) const
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:111
SDNode * getNode() const
get the SDNode which holds the desired result
bool isNormalLoad(const SDNode *N)
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
LLVM Constant Representation.
Definition: Constant.h:41
unsigned getOpcode() const
bool hasFp256() const
Definition: X86Subtarget.h:320
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool hasAVX512() const
Definition: X86Subtarget.h:319
EVT getValueType() const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue LowerVectorIntExtend ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 9114 of file X86ISelLowering.cpp.

9115  {
9116  // PMOVZX is only available from SSE41.
9117  if (!Subtarget->hasSSE41())
9118  return SDValue();
9119 
9120  MVT VT = Op.getSimpleValueType();
9121 
9122  // Only AVX2 support 256-bit vector integer extending.
9123  if (!Subtarget->hasInt256() && VT.is256BitVector())
9124  return SDValue();
9125 
9126  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9127  SDLoc DL(Op);
9128  SDValue V1 = Op.getOperand(0);
9129  SDValue V2 = Op.getOperand(1);
9130  unsigned NumElems = VT.getVectorNumElements();
9131 
9132  // Extending is an unary operation and the element type of the source vector
9133  // won't be equal to or larger than i64.
9134  if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
9135  VT.getVectorElementType() == MVT::i64)
9136  return SDValue();
9137 
9138  // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4.
9139  unsigned Shift = 1; // Start from 2, i.e. 1 << 1.
9140  while ((1U << Shift) < NumElems) {
9141  if (SVOp->getMaskElt(1U << Shift) == 1)
9142  break;
9143  Shift += 1;
9144  // The maximal ratio is 8, i.e. from i8 to i64.
9145  if (Shift > 3)
9146  return SDValue();
9147  }
9148 
9149  // Check the shuffle mask.
9150  unsigned Mask = (1U << Shift) - 1;
9151  for (unsigned i = 0; i != NumElems; ++i) {
9152  int EltIdx = SVOp->getMaskElt(i);
9153  if ((i & Mask) != 0 && EltIdx != -1)
9154  return SDValue();
9155  if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
9156  return SDValue();
9157  }
9158 
9159  unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
9160  MVT NeVT = MVT::getIntegerVT(NBits);
9161  MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
9162 
9163  if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
9164  return SDValue();
9165 
9166  // Simplify the operand as it's prepared to be fed into shuffle.
9167  unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
9168  if (V1.getOpcode() == ISD::BITCAST &&
9171  V1.getOperand(0).getOperand(0)
9172  .getSimpleValueType().getSizeInBits() == SignificantBits) {
9173  // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
9174  SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
9175  ConstantSDNode *CIdx =
9177  // If it's foldable, i.e. normal load with single use, we will let code
9178  // selection to fold it. Otherwise, we will short the conversion sequence.
9179  if (CIdx && CIdx->getZExtValue() == 0 &&
9180  (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
9181  MVT FullVT = V.getSimpleValueType();
9182  MVT V1VT = V1.getSimpleValueType();
9183  if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
9184  // The "ext_vec_elt" node is wider than the result node.
9185  // In this case we should extract subvector from V.
9186  // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
9187  unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
9188  MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
9189  FullVT.getVectorNumElements()/Ratio);
9190  V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
9191  DAG.getIntPtrConstant(0));
9192  }
9193  V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
9194  }
9195  }
9196 
9197  return DAG.getNode(ISD::BITCAST, DL, VT,
9198  DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
9199 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool hasOneUse() const
unsigned getSizeInBits() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
int getMaskElt(unsigned Idx) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
bool isNormalLoad(const SDNode *N)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
bool hasInt256() const
Definition: X86Subtarget.h:321
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getVectorElementType() const
uint64_t getZExtValue() const
static SDValue lowerVectorShuffle ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Top-level lowering for x86 vector shuffles.

This handles decomposition, canonicalization, and lowering of all x86 vector shuffles. Most of the specific lowering strategies are encapsulated above in helper routines. The canonicalization attempts to widen shuffles to involve fewer lanes of wider elements, consolidate symmetric patterns s.t. only one of the two inputs needs to be tested, etc.

Definition at line 7912 of file X86ISelLowering.cpp.

7913  {
7914  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7915  ArrayRef<int> Mask = SVOp->getMask();
7916  SDValue V1 = Op.getOperand(0);
7917  SDValue V2 = Op.getOperand(1);
7918  MVT VT = Op.getSimpleValueType();
7919  int NumElements = VT.getVectorNumElements();
7920  SDLoc dl(Op);
7921 
7922  assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
7923 
7924  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
7925  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
7926  if (V1IsUndef && V2IsUndef)
7927  return DAG.getUNDEF(VT);
7928 
7929  // When we create a shuffle node we put the UNDEF node to second operand,
7930  // but in some cases the first operand may be transformed to UNDEF.
7931  // In this case we should just commute the node.
7932  if (V1IsUndef)
7933  return DAG.getCommutedVectorShuffle(*SVOp);
7934 
7935  // Check for non-undef masks pointing at an undef vector and make the masks
7936  // undef as well. This makes it easier to match the shuffle based solely on
7937  // the mask.
7938  if (V2IsUndef)
7939  for (int M : Mask)
7940  if (M >= NumElements) {
7941  SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
7942  for (int &M : NewMask)
7943  if (M >= NumElements)
7944  M = -1;
7945  return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
7946  }
7947 
7948  // For integer vector shuffles, try to collapse them into a shuffle of fewer
7949  // lanes but wider integers. We cap this to not form integers larger than i64
7950  // but it might be interesting to form i128 integers to handle flipping the
7951  // low and high halves of AVX 256-bit vectors.
7952  if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
7954  SmallVector<int, 8> NewMask;
7955  for (int i = 0, Size = Mask.size(); i < Size; i += 2)
7956  NewMask.push_back(Mask[i] / 2);
7957  MVT NewVT =
7958  MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
7959  VT.getVectorNumElements() / 2);
7960  V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
7961  V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
7962  return DAG.getNode(ISD::BITCAST, dl, VT,
7963  DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
7964  }
7965 
7966  int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
7967  for (int M : SVOp->getMask())
7968  if (M < 0)
7969  ++NumUndefElements;
7970  else if (M < NumElements)
7971  ++NumV1Elements;
7972  else
7973  ++NumV2Elements;
7974 
7975  // Commute the shuffle as needed such that more elements come from V1 than
7976  // V2. This allows us to match the shuffle pattern strictly on how many
7977  // elements come from V1 without handling the symmetric cases.
7978  if (NumV2Elements > NumV1Elements)
7979  return DAG.getCommutedVectorShuffle(*SVOp);
7980 
7981  // When the number of V1 and V2 elements are the same, try to minimize the
7982  // number of uses of V2 in the low half of the vector.
7983  if (NumV1Elements == NumV2Elements) {
7984  int LowV1Elements = 0, LowV2Elements = 0;
7985  for (int M : SVOp->getMask().slice(0, NumElements / 2))
7986  if (M >= NumElements)
7987  ++LowV2Elements;
7988  else if (M >= 0)
7989  ++LowV1Elements;
7990  if (LowV2Elements > LowV1Elements)
7991  return DAG.getCommutedVectorShuffle(*SVOp);
7992  }
7993 
7994  // For each vector width, delegate to a specialized lowering routine.
7995  if (VT.getSizeInBits() == 128)
7996  return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
7997 
7998  llvm_unreachable("Unimplemented!");
7999 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getScalarSizeInBits() const
unsigned getSizeInBits() const
static bool areAdjacentMasksSequential(ArrayRef< int > Mask)
Tiny helper function to test whether adjacent masks are sequential.
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
ArrayRef< int > getMask() const
static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit x86 vector shuffles.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue LowerVSELECTtoBlend ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 9643 of file X86ISelLowering.cpp.

9644  {
9645  SDValue Cond = Op.getOperand(0);
9646  SDValue LHS = Op.getOperand(1);
9647  SDValue RHS = Op.getOperand(2);
9648  SDLoc dl(Op);
9649  MVT VT = Op.getSimpleValueType();
9650  MVT EltVT = VT.getVectorElementType();
9651  unsigned NumElems = VT.getVectorNumElements();
9652 
9653  // There is no blend with immediate in AVX-512.
9654  if (VT.is512BitVector())
9655  return SDValue();
9656 
9657  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
9658  return SDValue();
9659  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
9660  return SDValue();
9661 
9663  return SDValue();
9664 
9665  // Check the mask for BLEND and build the value.
9666  unsigned MaskValue = 0;
9667  if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
9668  return SDValue();
9669 
9670  // Convert i32 vectors to floating point if it is not AVX2.
9671  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
9672  MVT BlendVT = VT;
9673  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
9674  BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
9675  NumElems);
9676  LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
9677  RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
9678  }
9679 
9680  SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
9681  DAG.getConstant(MaskValue, MVT::i32));
9682  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
9683 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getSizeInBits() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
BLENDI - Blend where the selector is an immediate.
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, unsigned &MaskValue)
MVT getVectorElementType() const
static SDValue LowerVSETCC ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 12170 of file X86ISelLowering.cpp.

12171  {
12172  SDValue Op0 = Op.getOperand(0);
12173  SDValue Op1 = Op.getOperand(1);
12174  SDValue CC = Op.getOperand(2);
12175  MVT VT = Op.getSimpleValueType();
12176  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
12177  bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
12178  SDLoc dl(Op);
12179 
12180  if (isFP) {
12181 #ifndef NDEBUG
12183  assert(EltVT == MVT::f32 || EltVT == MVT::f64);
12184 #endif
12185 
12186  unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
12187  unsigned Opc = X86ISD::CMPP;
12188  if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
12189  assert(VT.getVectorNumElements() <= 16);
12190  Opc = X86ISD::CMPM;
12191  }
12192  // In the two special cases we can't handle, emit two comparisons.
12193  if (SSECC == 8) {
12194  unsigned CC0, CC1;
12195  unsigned CombineOpc;
12196  if (SetCCOpcode == ISD::SETUEQ) {
12197  CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
12198  } else {
12199  assert(SetCCOpcode == ISD::SETONE);
12200  CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
12201  }
12202 
12203  SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
12204  DAG.getConstant(CC0, MVT::i8));
12205  SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
12206  DAG.getConstant(CC1, MVT::i8));
12207  return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
12208  }
12209  // Handle all other FP comparisons here.
12210  return DAG.getNode(Opc, dl, VT, Op0, Op1,
12211  DAG.getConstant(SSECC, MVT::i8));
12212  }
12213 
12214  // Break 256-bit integer vector compare into smaller ones.
12215  if (VT.is256BitVector() && !Subtarget->hasInt256())
12216  return Lower256IntVSETCC(Op, DAG);
12217 
12218  bool MaskResult = (VT.getVectorElementType() == MVT::i1);
12219  EVT OpVT = Op1.getValueType();
12220  if (Subtarget->hasAVX512()) {
12221  if (Op1.getValueType().is512BitVector() ||
12222  (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
12223  return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
12224 
12225  // In AVX-512 architecture setcc returns mask with i1 elements,
12226  // But there is no compare instruction for i8 and i16 elements.
12227  // We are not talking about 512-bit operands in this case, these
12228  // types are illegal.
12229  if (MaskResult &&
12230  (OpVT.getVectorElementType().getSizeInBits() < 32 &&
12231  OpVT.getVectorElementType().getSizeInBits() >= 8))
12232  return DAG.getNode(ISD::TRUNCATE, dl, VT,
12233  DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
12234  }
12235 
12236  // We are handling one of the integer comparisons here. Since SSE only has
12237  // GT and EQ comparisons for integer, swapping operands and multiple
12238  // operations may be required for some comparisons.
12239  unsigned Opc;
12240  bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
12241  bool Subus = false;
12242 
12243  switch (SetCCOpcode) {
12244  default: llvm_unreachable("Unexpected SETCC condition");
12245  case ISD::SETNE: Invert = true;
12246  case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
12247  case ISD::SETLT: Swap = true;
12248  case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
12249  case ISD::SETGE: Swap = true;
12250  case ISD::SETLE: Opc = X86ISD::PCMPGT;
12251  Invert = true; break;
12252  case ISD::SETULT: Swap = true;
12253  case ISD::SETUGT: Opc = X86ISD::PCMPGT;
12254  FlipSigns = true; break;
12255  case ISD::SETUGE: Swap = true;
12256  case ISD::SETULE: Opc = X86ISD::PCMPGT;
12257  FlipSigns = true; Invert = true; break;
12258  }
12259 
12260  // Special case: Use min/max operations for SETULE/SETUGE
12261  MVT VET = VT.getVectorElementType();
12262  bool hasMinMax =
12263  (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
12264  || (Subtarget->hasSSE2() && (VET == MVT::i8));
12265 
12266  if (hasMinMax) {
12267  switch (SetCCOpcode) {
12268  default: break;
12269  case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
12270  case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
12271  }
12272 
12273  if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
12274  }
12275 
12276  bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
12277  if (!MinMax && hasSubus) {
12278  // As another special case, use PSUBUS[BW] when it's profitable. E.g. for
12279  // Op0 u<= Op1:
12280  // t = psubus Op0, Op1
12281  // pcmpeq t, <0..0>
12282  switch (SetCCOpcode) {
12283  default: break;
12284  case ISD::SETULT: {
12285  // If the comparison is against a constant we can turn this into a
12286  // setule. With psubus, setule does not require a swap. This is
12287  // beneficial because the constant in the register is no longer
12288  // destructed as the destination so it can be hoisted out of a loop.
12289  // Only do this pre-AVX since vpcmp* is no longer destructive.
12290  if (Subtarget->hasAVX())
12291  break;
12292  SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
12293  if (ULEOp1.getNode()) {
12294  Op1 = ULEOp1;
12295  Subus = true; Invert = false; Swap = false;
12296  }
12297  break;
12298  }
12299  // Psubus is better than flip-sign because it requires no inversion.
12300  case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break;
12301  case ISD::SETULE: Subus = true; Invert = false; Swap = false; break;
12302  }
12303 
12304  if (Subus) {
12305  Opc = X86ISD::SUBUS;
12306  FlipSigns = false;
12307  }
12308  }
12309 
12310  if (Swap)
12311  std::swap(Op0, Op1);
12312 
12313  // Check that the operation in question is available (most are plain SSE2,
12314  // but PCMPGTQ and PCMPEQQ have different requirements).
12315  if (VT == MVT::v2i64) {
12316  if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
12317  assert(Subtarget->hasSSE2() && "Don't know how to lower!");
12318 
12319  // First cast everything to the right type.
12320  Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
12321  Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
12322 
12323  // Since SSE has no unsigned integer comparisons, we need to flip the sign
12324  // bits of the inputs before performing those operations. The lower
12325  // compare is always unsigned.
12326  SDValue SB;
12327  if (FlipSigns) {
12328  SB = DAG.getConstant(0x80000000U, MVT::v4i32);
12329  } else {
12330  SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
12331  SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
12332  SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
12333  Sign, Zero, Sign, Zero);
12334  }
12335  Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
12336  Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
12337 
12338  // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
12339  SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
12340  SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
12341 
12342  // Create masks for only the low parts/high parts of the 64 bit integers.
12343  static const int MaskHi[] = { 1, 1, 3, 3 };
12344  static const int MaskLo[] = { 0, 0, 2, 2 };
12345  SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
12346  SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
12347  SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
12348 
12349  SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
12350  Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
12351 
12352  if (Invert)
12353  Result = DAG.getNOT(dl, Result, MVT::v4i32);
12354 
12355  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
12356  }
12357 
12358  if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
12359  // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
12360  // pcmpeqd + pshufd + pand.
12361  assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
12362 
12363  // First cast everything to the right type.
12364  Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
12365  Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
12366 
12367  // Do the compare.
12368  SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
12369 
12370  // Make sure the lower and upper halves are both all-ones.
12371  static const int Mask[] = { 1, 0, 3, 2 };
12372  SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
12373  Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
12374 
12375  if (Invert)
12376  Result = DAG.getNOT(dl, Result, MVT::v4i32);
12377 
12378  return DAG.getNode(ISD::BITCAST, dl, VT, Result);
12379  }
12380  }
12381 
12382  // Since SSE has no unsigned integer comparisons, we need to flip the sign
12383  // bits of the inputs before performing those operations.
12384  if (FlipSigns) {
12385  EVT EltVT = VT.getVectorElementType();
12386  SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
12387  Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
12388  Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
12389  }
12390 
12391  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
12392 
12393  // If the logical-not of the result is required, perform that now.
12394  if (Invert)
12395  Result = DAG.getNOT(dl, Result, VT);
12396 
12397  if (MinMax)
12398  Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
12399 
12400  if (Subus)
12401  Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
12402  getZeroVector(VT, Subtarget, DAG, dl));
12403 
12404  return Result;
12405 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, SDValue &Op1)
Turns an ISD::CondCode into a value suitable for SSE floating point mask CMPs.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
Try to turn a VSETULT into a VSETULE by modifying its second operand Op1. If non-trivial (for example...
bool hasSSE41() const
Definition: X86Subtarget.h:315
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
#define EQ(a, b)
Definition: regexec.c:112
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
getNOT - Create a bitwise NOT operation as (XOR Val, -1).
***NAME is the name of the raw_ostream unsigned & i1
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
bool hasSSE42() const
Definition: X86Subtarget.h:316
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool hasAVX512() const
Definition: X86Subtarget.h:319
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:146
bool hasAVX() const
Definition: X86Subtarget.h:317
static SDValue LowerXALUO ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 15808 of file X86ISelLowering.cpp.

15808  {
15809  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
15810  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
15811  // looks for this combo and may remove the "setcc" instruction if the "setcc"
15812  // has only one use.
15813  SDNode *N = Op.getNode();
15814  SDValue LHS = N->getOperand(0);
15815  SDValue RHS = N->getOperand(1);
15816  unsigned BaseOp = 0;
15817  unsigned Cond = 0;
15818  SDLoc DL(Op);
15819  switch (Op.getOpcode()) {
15820  default: llvm_unreachable("Unknown ovf instruction!");
15821  case ISD::SADDO:
15822  // A subtract of one will be selected as a INC. Note that INC doesn't
15823  // set CF, so we can't do this for UADDO.
15824  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
15825  if (C->isOne()) {
15826  BaseOp = X86ISD::INC;
15827  Cond = X86::COND_O;
15828  break;
15829  }
15830  BaseOp = X86ISD::ADD;
15831  Cond = X86::COND_O;
15832  break;
15833  case ISD::UADDO:
15834  BaseOp = X86ISD::ADD;
15835  Cond = X86::COND_B;
15836  break;
15837  case ISD::SSUBO:
15838  // A subtract of one will be selected as a DEC. Note that DEC doesn't
15839  // set CF, so we can't do this for USUBO.
15840  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
15841  if (C->isOne()) {
15842  BaseOp = X86ISD::DEC;
15843  Cond = X86::COND_O;
15844  break;
15845  }
15846  BaseOp = X86ISD::SUB;
15847  Cond = X86::COND_O;
15848  break;
15849  case ISD::USUBO:
15850  BaseOp = X86ISD::SUB;
15851  Cond = X86::COND_B;
15852  break;
15853  case ISD::SMULO:
15854  BaseOp = X86ISD::SMUL;
15855  Cond = X86::COND_O;
15856  break;
15857  case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
15858  SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
15859  MVT::i32);
15860  SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
15861 
15862  SDValue SetCC =
15863  DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
15864  DAG.getConstant(X86::COND_O, MVT::i32),
15865  SDValue(Sum.getNode(), 2));
15866 
15867  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
15868  }
15869  }
15870 
15871  // Also sets EFLAGS.
15872  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
15873  SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
15874 
15875  SDValue SetCC =
15876  DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
15877  DAG.getConstant(Cond, MVT::i32),
15878  SDValue(Sum.getNode(), 1));
15879 
15880  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
15881 }
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
Same for subtraction.
Definition: ISDOpcodes.h:221
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
SDVTList getVTList(EVT VT)
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
#define N
Same for multiplication.
Definition: ISDOpcodes.h:224
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue LowerZERO_EXTEND ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 11165 of file X86ISelLowering.cpp.

11166  {
11167  SDLoc DL(Op);
11168  MVT VT = Op.getSimpleValueType();
11169  SDValue In = Op.getOperand(0);
11170  MVT SVT = In.getSimpleValueType();
11171 
11172  if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
11173  return LowerZERO_EXTEND_AVX512(Op, DAG);
11174 
11175  if (Subtarget->hasFp256()) {
11176  SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
11177  if (Res.getNode())
11178  return Res;
11179  }
11180 
11181  assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
11183  return SDValue();
11184 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
bool hasFp256() const
Definition: X86Subtarget.h:320
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG)
***NAME is the name of the raw_ostream unsigned & i1
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getVectorElementType() const
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget)
static SDValue LowerZERO_EXTEND_AVX512 ( SDValue  Op,
SelectionDAG DAG 
)
static

Definition at line 11123 of file X86ISelLowering.cpp.

11124  {
11125  MVT VT = Op->getSimpleValueType(0);
11126  SDValue In = Op->getOperand(0);
11127  MVT InVT = In.getSimpleValueType();
11128  SDLoc DL(Op);
11129  unsigned int NumElts = VT.getVectorNumElements();
11130  if (NumElts != 8 && NumElts != 16)
11131  return SDValue();
11132 
11133  if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
11134  return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
11135 
11136  EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
11137  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11138  // Now we have only mask extension
11139  assert(InVT.getVectorElementType() == MVT::i1);
11140  SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
11141  const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
11142  SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
11143  unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
11144  SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
11145  MachinePointerInfo::getConstantPool(),
11146  false, false, false, Alignment);
11147 
11148  SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
11149  if (VT.is512BitVector())
11150  return Brcst;
11151  return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
11152 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
const SDValue & getOperand(unsigned Num) const
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
EVT getScalarType() const
Definition: ValueTypes.h:211
virtual MVT getPointerTy(uint32_t=0) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
LLVM Constant Representation.
Definition: Constant.h:41
***NAME is the name of the raw_ostream unsigned & i1
SDValue getTargetConstant(uint64_t Val, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:406
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
MVT getVectorElementType() const
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
MVT getSimpleValueType(unsigned ResNo) const
static SDValue matchAddSub ( const BuildVectorSDNode BV,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Try to fold a build_vector that performs an 'addsub' into the sequence of 'vadd + vsub + blendi'.

Definition at line 6227 of file X86ISelLowering.cpp.

6228  {
6229  SDLoc DL(BV);
6230  EVT VT = BV->getValueType(0);
6231  unsigned NumElts = VT.getVectorNumElements();
6232  SDValue InVec0 = DAG.getUNDEF(VT);
6233  SDValue InVec1 = DAG.getUNDEF(VT);
6234 
6235  assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
6236  VT == MVT::v2f64) && "build_vector with an invalid type found!");
6237 
6238  // Don't try to emit a VSELECT that cannot be lowered into a blend.
6239  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6240  if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
6241  return SDValue();
6242 
6243  // Odd-numbered elements in the input build vector are obtained from
6244  // adding two integer/float elements.
6245  // Even-numbered elements in the input build vector are obtained from
6246  // subtracting two integer/float elements.
6247  unsigned ExpectedOpcode = ISD::FSUB;
6248  unsigned NextExpectedOpcode = ISD::FADD;
6249  bool AddFound = false;
6250  bool SubFound = false;
6251 
6252  for (unsigned i = 0, e = NumElts; i != e; i++) {
6253  SDValue Op = BV->getOperand(i);
6254 
6255  // Skip 'undef' values.
6256  unsigned Opcode = Op.getOpcode();
6257  if (Opcode == ISD::UNDEF) {
6258  std::swap(ExpectedOpcode, NextExpectedOpcode);
6259  continue;
6260  }
6261 
6262  // Early exit if we found an unexpected opcode.
6263  if (Opcode != ExpectedOpcode)
6264  return SDValue();
6265 
6266  SDValue Op0 = Op.getOperand(0);
6267  SDValue Op1 = Op.getOperand(1);
6268 
6269  // Try to match the following pattern:
6270  // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
6271  // Early exit if we cannot match that sequence.
6272  if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6274  !isa<ConstantSDNode>(Op0.getOperand(1)) ||
6275  !isa<ConstantSDNode>(Op1.getOperand(1)) ||
6276  Op0.getOperand(1) != Op1.getOperand(1))
6277  return SDValue();
6278 
6279  unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
6280  if (I0 != i)
6281  return SDValue();
6282 
6283  // We found a valid add/sub node. Update the information accordingly.
6284  if (i & 1)
6285  AddFound = true;
6286  else
6287  SubFound = true;
6288 
6289  // Update InVec0 and InVec1.
6290  if (InVec0.getOpcode() == ISD::UNDEF)
6291  InVec0 = Op0.getOperand(0);
6292  if (InVec1.getOpcode() == ISD::UNDEF)
6293  InVec1 = Op1.getOperand(0);
6294 
6295  // Make sure that operands in input to each add/sub node always
6296  // come from a same pair of vectors.
6297  if (InVec0 != Op0.getOperand(0)) {
6298  if (ExpectedOpcode == ISD::FSUB)
6299  return SDValue();
6300 
6301  // FADD is commutable. Try to commute the operands
6302  // and then test again.
6303  std::swap(Op0, Op1);
6304  if (InVec0 != Op0.getOperand(0))
6305  return SDValue();
6306  }
6307 
6308  if (InVec1 != Op1.getOperand(0))
6309  return SDValue();
6310 
6311  // Update the pair of expected opcodes.
6312  std::swap(ExpectedOpcode, NextExpectedOpcode);
6313  }
6314 
6315  // Don't try to fold this build_vector into a VSELECT if it has
6316  // too many UNDEF operands.
6317  if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
6318  InVec1.getOpcode() != ISD::UNDEF) {
6319  // Emit a sequence of vector add and sub followed by a VSELECT.
6320  // The new VSELECT will be lowered into a BLENDI.
6321  // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
6322  // and emit a single ADDSUB instruction.
6323  SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
6324  SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
6325 
6326  // Construct the VSELECT mask.
6327  EVT MaskVT = VT.changeVectorElementTypeToInteger();
6328  EVT SVT = MaskVT.getVectorElementType();
6329  unsigned SVTBits = SVT.getSizeInBits();
6331 
6332  for (unsigned i = 0, e = NumElts; i != e; ++i) {
6333  APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
6334  APInt::getAllOnesValue(SVTBits);
6335  SDValue Constant = DAG.getConstant(Value, SVT);
6336  Ops.push_back(Constant);
6337  }
6338 
6339  SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
6340  return DAG.getSelect(DL, VT, Mask, Sub, Add);
6341  }
6342 
6343  return SDValue();
6344 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:227
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
LLVM Constant Representation.
Definition: Constant.h:41
unsigned getOpcode() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Definition: SelectionDAG.h:703
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM Value Representation.
Definition: Value.h:69
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
EVT changeVectorElementTypeToInteger() const
Definition: ValueTypes.h:81
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool MatchingStackOffset ( SDValue  Arg,
unsigned  Offset,
ISD::ArgFlagsTy  Flags,
MachineFrameInfo MFI,
const MachineRegisterInfo MRI,
const X86InstrInfo TII 
)
static

MatchingStackOffset - Return true if the given stack call argument is already available in the same position (relatively) of the caller's incoming argument stack.

Definition at line 3104 of file X86ISelLowering.cpp.

3106  {
3107  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
3108  int FI = INT_MAX;
3109  if (Arg.getOpcode() == ISD::CopyFromReg) {
3110  unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3111  if (!TargetRegisterInfo::isVirtualRegister(VR))
3112  return false;
3113  MachineInstr *Def = MRI->getVRegDef(VR);
3114  if (!Def)
3115  return false;
3116  if (!Flags.isByVal()) {
3117  if (!TII->isLoadFromStackSlot(Def, FI))
3118  return false;
3119  } else {
3120  unsigned Opcode = Def->getOpcode();
3121  if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
3122  Def->getOperand(1).isFI()) {
3123  FI = Def->getOperand(1).getIndex();
3124  Bytes = Flags.getByValSize();
3125  } else
3126  return false;
3127  }
3128  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3129  if (Flags.isByVal())
3130  // ByVal argument is passed in as a pointer but it's now being
3131  // dereferenced. e.g.
3132  // define @foo(%struct.X* %A) {
3133  // tail call @bar(%struct.X* byval %A)
3134  // }
3135  return false;
3136  SDValue Ptr = Ld->getBasePtr();
3137  FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3138  if (!FINode)
3139  return false;
3140  FI = FINode->getIndex();
3141  } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3142  FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3143  FI = FINode->getIndex();
3144  Bytes = Flags.getByValSize();
3145  } else
3146  return false;
3147 
3148  assert(FI != INT_MAX);
3149  if (!MFI->isFixedObjectIndex(FI))
3150  return false;
3151  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
3152 }
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override
unsigned getByValSize() const
bool isFixedObjectIndex(int ObjectIdx) const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int getOpcode() const
Definition: MachineInstr.h:270
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:276
unsigned getOpcode() const
int64_t getObjectOffset(int ObjectIdx) const
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
MachineInstr * getVRegDef(unsigned Reg) const
int64_t getObjectSize(int ObjectIdx) const
static std::pair<unsigned, bool> matchIntegerMINMAX ( SDValue  Cond,
EVT  VT,
SDValue  LHS,
SDValue  RHS,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Matches a VSELECT onto min/max or return 0 if the node doesn't match.

Definition at line 19132 of file X86ISelLowering.cpp.

19133  {
19134  if (!VT.isVector())
19135  return std::make_pair(0, false);
19136 
19137  bool NeedSplit = false;
19138  switch (VT.getSimpleVT().SimpleTy) {
19139  default: return std::make_pair(0, false);
19140  case MVT::v32i8:
19141  case MVT::v16i16:
19142  case MVT::v8i32:
19143  if (!Subtarget->hasAVX2())
19144  NeedSplit = true;
19145  if (!Subtarget->hasAVX())
19146  return std::make_pair(0, false);
19147  break;
19148  case MVT::v16i8:
19149  case MVT::v8i16:
19150  case MVT::v4i32:
19151  if (!Subtarget->hasSSE2())
19152  return std::make_pair(0, false);
19153  }
19154 
19155  // SSE2 has only a small subset of the operations.
19156  bool hasUnsigned = Subtarget->hasSSE41() ||
19157  (Subtarget->hasSSE2() && VT == MVT::v16i8);
19158  bool hasSigned = Subtarget->hasSSE41() ||
19159  (Subtarget->hasSSE2() && VT == MVT::v8i16);
19160 
19161  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19162 
19163  unsigned Opc = 0;
19164  // Check for x CC y ? x : y.
19165  if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
19166  DAG.isEqualTo(RHS, Cond.getOperand(1))) {
19167  switch (CC) {
19168  default: break;
19169  case ISD::SETULT:
19170  case ISD::SETULE:
19171  Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
19172  case ISD::SETUGT:
19173  case ISD::SETUGE:
19174  Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
19175  case ISD::SETLT:
19176  case ISD::SETLE:
19177  Opc = hasSigned ? X86ISD::SMIN : 0; break;
19178  case ISD::SETGT:
19179  case ISD::SETGE:
19180  Opc = hasSigned ? X86ISD::SMAX : 0; break;
19181  }
19182  // Check for x CC y ? y : x -- a min/max with reversed arms.
19183  } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
19184  DAG.isEqualTo(RHS, Cond.getOperand(0))) {
19185  switch (CC) {
19186  default: break;
19187  case ISD::SETULT:
19188  case ISD::SETULE:
19189  Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
19190  case ISD::SETUGT:
19191  case ISD::SETUGE:
19192  Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
19193  case ISD::SETLT:
19194  case ISD::SETLE:
19195  Opc = hasSigned ? X86ISD::SMAX : 0; break;
19196  case ISD::SETGT:
19197  case ISD::SETGE:
19198  Opc = hasSigned ? X86ISD::SMIN : 0; break;
19199  }
19200  }
19201 
19202  return std::make_pair(Opc, NeedSplit);
19203 }
bool hasSSE41() const
Definition: X86Subtarget.h:315
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
SimpleValueType SimpleTy
bool hasSSE2() const
Definition: X86Subtarget.h:312
const SDValue & getOperand(unsigned i) const
bool hasAVX2() const
Definition: X86Subtarget.h:318
bool isEqualTo(SDValue A, SDValue B) const
bool hasAVX() const
Definition: X86Subtarget.h:317
MVT getSimpleVT() const
Definition: ValueTypes.h:204
static SDValue MaterializeSETB ( SDLoc  DL,
SDValue  EFLAGS,
SelectionDAG DAG,
MVT  VT 
)
static

Definition at line 21768 of file X86ISelLowering.cpp.

21769  {
21770  if (VT == MVT::i8)
21771  return DAG.getNode(ISD::AND, DL, VT,
21772  DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
21773  DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
21774  DAG.getConstant(1, VT));
21775  assert (VT == MVT::i1 && "Unexpected type for SECCC node");
21776  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
21777  DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
21778  DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS));
21779 }
assert(Globals.size() > 1)
***NAME is the name of the raw_ostream unsigned & i1
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static bool MayFoldIntoStore ( SDValue  Op)
static

Definition at line 3374 of file X86ISelLowering.cpp.

3374  {
3375  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
3376 }
bool hasOneUse() const
bool isNormalStore(const SDNode *N)
SDNode * getNode() const
get the SDNode which holds the desired result
use_iterator use_begin() const
static bool MayFoldLoad ( SDValue  Op)
static

Definition at line 3370 of file X86ISelLowering.cpp.

3370  {
3371  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
3372 }
bool hasOneUse() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool isNormalLoad(const SDNode *N)
static bool MayFoldVectorLoad ( SDValue  V)
static

Definition at line 8892 of file X86ISelLowering.cpp.

8892  {
8893  while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
8894  V = V.getOperand(0);
8895 
8896  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
8897  V = V.getOperand(0);
8898  if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
8899  V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
8900  // BUILD_VECTOR (load), undef
8901  V = V.getOperand(0);
8902 
8903  return MayFoldLoad(V);
8904 }
bool hasOneUse() const
unsigned getNumOperands() const
static bool MayFoldLoad(SDValue Op)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue NarrowVectorLoadToElement ( LoadSDNode Load,
unsigned  Index,
SelectionDAG DAG 
)
static

Definition at line 9009 of file X86ISelLowering.cpp.

9010  {
9011  SDLoc dl(Load);
9012  MVT VT = Load->getSimpleValueType(0);
9013  MVT EVT = VT.getVectorElementType();
9014  SDValue Addr = Load->getOperand(1);
9015  SDValue NewAddr = DAG.getNode(
9016  ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
9017  DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType()));
9018 
9019  SDValue NewLoad =
9020  DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
9022  Load->getMemOperand(), 0, EVT.getStoreSize()));
9023  return NewLoad;
9024 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const SDValue & getOperand(unsigned Num) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
unsigned getStoreSize() const
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static void NormalizeMask ( SmallVectorImpl< int > &  Mask,
unsigned  NumElems 
)
static

NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements that point to V2 points to its first element.

Definition at line 4956 of file X86ISelLowering.cpp.

4956  {
4957  for (unsigned i = 0; i != NumElems; ++i) {
4958  if (Mask[i] > (int)NumElems) {
4959  Mask[i] = NumElems;
4960  }
4961  }
4962 }
static SDValue NormalizeVectorShuffle ( SDValue  Op,
const X86Subtarget Subtarget,
SelectionDAG DAG 
)
static

Definition at line 9201 of file X86ISelLowering.cpp.

9202  {
9203  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9204  MVT VT = Op.getSimpleValueType();
9205  SDLoc dl(Op);
9206  SDValue V1 = Op.getOperand(0);
9207  SDValue V2 = Op.getOperand(1);
9208 
9209  if (isZeroShuffle(SVOp))
9210  return getZeroVector(VT, Subtarget, DAG, dl);
9211 
9212  // Handle splat operations
9213  if (SVOp->isSplat()) {
9214  // Use vbroadcast whenever the splat comes from a foldable load
9215  SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
9216  if (Broadcast.getNode())
9217  return Broadcast;
9218  }
9219 
9220  // Check integer expanding shuffles.
9221  SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
9222  if (NewOp.getNode())
9223  return NewOp;
9224 
9225  // If the shuffle can be profitably rewritten as a narrower shuffle, then
9226  // do it!
9227  if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 ||
9228  VT == MVT::v32i8) {
9229  SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
9230  if (NewOp.getNode())
9231  return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
9232  } else if (VT.is128BitVector() && Subtarget->hasSSE2()) {
9233  // FIXME: Figure out a cleaner way to do this.
9234  if (ISD::isBuildVectorAllZeros(V2.getNode())) {
9235  SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
9236  if (NewOp.getNode()) {
9237  MVT NewVT = NewOp.getSimpleValueType();
9238  if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
9239  NewVT, true, false))
9240  return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget,
9241  dl);
9242  }
9243  } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
9244  SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
9245  if (NewOp.getNode()) {
9246  MVT NewVT = NewOp.getSimpleValueType();
9247  if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
9248  return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget,
9249  dl);
9250  }
9251  }
9252  }
9253  return SDValue();
9254 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
bool isBuildVectorAllZeros(const SDNode *N)
static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG)
static bool isMOVLMask(ArrayRef< int > Mask, EVT VT)
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
static bool isZeroShuffle(ShuffleVectorSDNode *N)
static SDValue getVZextMovL(MVT VT, MVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, SDLoc dl)
static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static bool isCommutedMOVLMask(ArrayRef< int > Mask, MVT VT, bool V2IsSplat=false, bool V2IsUndef=false)
static SDValue OptimizeConditionalInDecrement ( SDNode N,
SelectionDAG DAG 
)
static

Definition at line 21960 of file X86ISelLowering.cpp.

21960  {
21961  SDLoc DL(N);
21962 
21963  // Look through ZExts.
21964  SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
21965  if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
21966  return SDValue();
21967 
21968  SDValue SetCC = Ext.getOperand(0);
21969  if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
21970  return SDValue();
21971 
21973  if (CC != X86::COND_E && CC != X86::COND_NE)
21974  return SDValue();
21975 
21976  SDValue Cmp = SetCC.getOperand(1);
21977  if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
21978  !X86::isZeroNode(Cmp.getOperand(1)) ||
21979  !Cmp.getOperand(0).getValueType().isInteger())
21980  return SDValue();
21981 
21982  SDValue CmpOp0 = Cmp.getOperand(0);
21983  SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
21984  DAG.getConstant(1, CmpOp0.getValueType()));
21985 
21986  SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
21987  if (CC == X86::COND_NE)
21988  return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
21989  DL, OtherVal.getValueType(), OtherVal,
21990  DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
21991  return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
21992  DL, OtherVal.getValueType(), OtherVal,
21993  DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
21994 }
bool hasOneUse() const
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:111
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
uint64_t getConstantOperandVal(unsigned i) const
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
bool isZeroNode(SDValue Elt)
static SDValue PerformADCCombine ( SDNode N,
SelectionDAG DAG,
X86TargetLowering::DAGCombinerInfo &  DCI 
)
static

Definition at line 21932 of file X86ISelLowering.cpp.

21933  {
21934  // If the LHS and RHS of the ADC node are zero, then it can't overflow and
21935  // the result is either zero or one (depending on the input carry bit).
21936  // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
21937  if (X86::isZeroNode(N->getOperand(0)) &&
21938  X86::isZeroNode(N->getOperand(1)) &&
21939  // We don't have a good way to replace an EFLAGS use, so only do this when
21940  // dead right now.
21941  SDValue(N, 1).use_empty()) {
21942  SDLoc DL(N);
21943  EVT VT = N->getValueType(0);
21944  SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
21945  SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
21946  DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
21947  DAG.getConstant(X86::COND_B,MVT::i8),
21948  N->getOperand(2)),
21949  DAG.getConstant(1, VT));
21950  return DCI.CombineTo(N, Res1, CarryOut);
21951  }
21952 
21953  return SDValue();
21954 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool use_empty() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
bool isZeroNode(SDValue Elt)
static SDValue PerformAddCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

PerformADDCombine - Do target-specific dag combines on integer adds.

Definition at line 21997 of file X86ISelLowering.cpp.

21998  {
21999  EVT VT = N->getValueType(0);
22000  SDValue Op0 = N->getOperand(0);
22001  SDValue Op1 = N->getOperand(1);
22002 
22003  // Try to synthesize horizontal adds from adds of shuffles.
22004  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
22005  (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
22006  isHorizontalBinOp(Op0, Op1, true))
22007  return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
22008 
22009  return OptimizeConditionalInDecrement(N, DAG);
22010 }
const SDValue & getOperand(unsigned Num) const
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG)
EVT getValueType(unsigned ResNo) const
HADD - Integer horizontal add.
bool hasSSSE3() const
Definition: X86Subtarget.h:314
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
Return true if N implements a horizontal binop and return the operands for the horizontal binop into ...
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue PerformAndCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 20590 of file X86ISelLowering.cpp.

20592  {
20593  EVT VT = N->getValueType(0);
20594  if (DCI.isBeforeLegalizeOps())
20595  return SDValue();
20596 
20597  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
20598  if (R.getNode())
20599  return R;
20600 
20601  // Create BEXTR instructions
20602  // BEXTR is ((X >> imm) & (2**size-1))
20603  if (VT == MVT::i32 || VT == MVT::i64) {
20604  SDValue N0 = N->getOperand(0);
20605  SDValue N1 = N->getOperand(1);
20606  SDLoc DL(N);
20607 
20608  // Check for BEXTR.
20609  if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
20610  (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
20611  ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
20612  ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
20613  if (MaskNode && ShiftNode) {
20614  uint64_t Mask = MaskNode->getZExtValue();
20615  uint64_t Shift = ShiftNode->getZExtValue();
20616  if (isMask_64(Mask)) {
20617  uint64_t MaskSize = CountPopulation_64(Mask);
20618  if (Shift + MaskSize <= VT.getSizeInBits())
20619  return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
20620  DAG.getConstant(Shift | (MaskSize << 8), VT));
20621  }
20622  }
20623  } // BEXTR
20624 
20625  return SDValue();
20626  }
20627 
20628  // Want to form ANDNP nodes:
20629  // 1) In the hopes of then easily combining them with OR and AND nodes
20630  // to form PBLEND/PSIGN.
20631  // 2) To match ANDN packed intrinsics
20632  if (VT != MVT::v2i64 && VT != MVT::v4i64)
20633  return SDValue();
20634 
20635  SDValue N0 = N->getOperand(0);
20636  SDValue N1 = N->getOperand(1);
20637  SDLoc DL(N);
20638 
20639  // Check LHS for vnot
20640  if (N0.getOpcode() == ISD::XOR &&
20641  //ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
20643  return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
20644 
20645  // Check RHS for vnot
20646  if (N1.getOpcode() == ISD::XOR &&
20647  //ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
20649  return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
20650 
20651  return SDValue();
20652 }
bool hasBMI() const
Definition: X86Subtarget.h:338
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool hasTBM() const
Definition: X86Subtarget.h:332
bool isMask_64(uint64_t Value)
Definition: MathExtras.h:344
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
unsigned CountPopulation_64(uint64_t Value)
Definition: MathExtras.h:438
static bool CanFoldXORWithAllOnes(const SDNode *N)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
ANDNP - Bitwise Logical AND NOT of Packed FP values.
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
uint64_t getZExtValue() const
static SDValue PerformBrCondCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 21826 of file X86ISelLowering.cpp.

21828  {
21829  SDLoc DL(N);
21830  SDValue Chain = N->getOperand(0);
21831  SDValue Dest = N->getOperand(1);
21832  SDValue EFLAGS = N->getOperand(3);
21834 
21835  SDValue Flags;
21836 
21837  Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
21838  if (Flags.getNode()) {
21839  SDValue Cond = DAG.getConstant(CC, MVT::i8);
21840  return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
21841  Flags);
21842  }
21843 
21844  return SDValue();
21845 }
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC)
uint64_t getConstantOperandVal(unsigned Num) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue PerformBTCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

Definition at line 21509 of file X86ISelLowering.cpp.

21511  {
21512  // BT ignores high bits in the bit index operand.
21513  SDValue Op1 = N->getOperand(1);
21514  if (Op1.hasOneUse()) {
21515  unsigned BitWidth = Op1.getValueSizeInBits();
21516  APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
21517  APInt KnownZero, KnownOne;
21519  !DCI.isBeforeLegalizeOps());
21520  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21521  if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
21522  TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
21523  DCI.CommitTargetLoweringOpt(TLO);
21524  }
21525  return SDValue();
21526 }
bool hasOneUse() const
unsigned getValueSizeInBits() const
const SDValue & getOperand(unsigned Num) const
unsigned Log2_32(uint32_t Value)
Definition: MathExtras.h:452
Class for arbitrary precision integers.
Definition: APInt.h:75
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
static SDValue PerformBUILD_VECTORCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 6346 of file X86ISelLowering.cpp.

6347  {
6348  SDLoc DL(N);
6349  EVT VT = N->getValueType(0);
6350  unsigned NumElts = VT.getVectorNumElements();
6351  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
6352  SDValue InVec0, InVec1;
6353 
6354  // Try to match an ADDSUB.
6355  if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
6356  (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
6357  SDValue Value = matchAddSub(BV, DAG, Subtarget);
6358  if (Value.getNode())
6359  return Value;
6360  }
6361 
6362  // Try to match horizontal ADD/SUB.
6363  unsigned NumUndefsLO = 0;
6364  unsigned NumUndefsHI = 0;
6365  unsigned Half = NumElts/2;
6366 
6367  // Count the number of UNDEF operands in the build_vector in input.
6368  for (unsigned i = 0, e = Half; i != e; ++i)
6369  if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
6370  NumUndefsLO++;
6371 
6372  for (unsigned i = Half, e = NumElts; i != e; ++i)
6373  if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
6374  NumUndefsHI++;
6375 
6376  // Early exit if this is either a build_vector of all UNDEFs or all the
6377  // operands but one are UNDEF.
6378  if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
6379  return SDValue();
6380 
6381  if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
6382  // Try to match an SSE3 float HADD/HSUB.
6383  if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
6384  return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
6385 
6386  if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
6387  return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
6388  } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
6389  // Try to match an SSSE3 integer HADD/HSUB.
6390  if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
6391  return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
6392 
6393  if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
6394  return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
6395  }
6396 
6397  if (!Subtarget->hasAVX())
6398  return SDValue();
6399 
6400  if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
6401  // Try to match an AVX horizontal add/sub of packed single/double
6402  // precision floating point values from 256-bit vectors.
6403  SDValue InVec2, InVec3;
6404  if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
6405  isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
6406  ((InVec0.getOpcode() == ISD::UNDEF ||
6407  InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
6408  ((InVec1.getOpcode() == ISD::UNDEF ||
6409  InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
6410  return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
6411 
6412  if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
6413  isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
6414  ((InVec0.getOpcode() == ISD::UNDEF ||
6415  InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
6416  ((InVec1.getOpcode() == ISD::UNDEF ||
6417  InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
6418  return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
6419  } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
6420  // Try to match an AVX2 horizontal add/sub of signed integers.
6421  SDValue InVec2, InVec3;
6422  unsigned X86Opcode;
6423  bool CanFold = true;
6424 
6425  if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
6426  isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
6427  ((InVec0.getOpcode() == ISD::UNDEF ||
6428  InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
6429  ((InVec1.getOpcode() == ISD::UNDEF ||
6430  InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
6431  X86Opcode = X86ISD::HADD;
6432  else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
6433  isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
6434  ((InVec0.getOpcode() == ISD::UNDEF ||
6435  InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
6436  ((InVec1.getOpcode() == ISD::UNDEF ||
6437  InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
6438  X86Opcode = X86ISD::HSUB;
6439  else
6440  CanFold = false;
6441 
6442  if (CanFold) {
6443  // Fold this build_vector into a single horizontal add/sub.
6444  // Do this only if the target has AVX2.
6445  if (Subtarget->hasAVX2())
6446  return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
6447 
6448  // Do not try to expand this build_vector into a pair of horizontal
6449  // add/sub if we can emit a pair of scalar add/sub.
6450  if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
6451  return SDValue();
6452 
6453  // Convert this build_vector into a pair of horizontal binop followed by
6454  // a concat vector.
6455  bool isUndefLO = NumUndefsLO == Half;
6456  bool isUndefHI = NumUndefsHI == Half;
6457  return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
6458  isUndefLO, isUndefHI);
6459  }
6460  }
6461 
6462  if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
6463  VT == MVT::v16i16) && Subtarget->hasAVX()) {
6464  unsigned X86Opcode;
6465  if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
6466  X86Opcode = X86ISD::HADD;
6467  else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
6468  X86Opcode = X86ISD::HSUB;
6469  else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
6470  X86Opcode = X86ISD::FHADD;
6471  else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
6472  X86Opcode = X86ISD::FHSUB;
6473  else
6474  return SDValue();
6475 
6476  // Don't try to expand this build_vector into a pair of horizontal add/sub
6477  // if we can simply emit a pair of scalar add/sub.
6478  if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
6479  return SDValue();
6480 
6481  // Convert this build_vector into two horizontal add/sub followed by
6482  // a concat vector.
6483  bool isUndefLO = NumUndefsLO == Half;
6484  bool isUndefHI = NumUndefsHI == Half;
6485  return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
6486  isUndefLO, isUndefHI);
6487  }
6488 
6489  return SDValue();
6490 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
unsigned getOpcode() const
FHSUB - Floating point horizontal sub.
const SDValue & getOperand(unsigned Num) const
HSUB - Integer horizontal sub.
EVT getValueType(unsigned ResNo) const
static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, SDLoc DL, SelectionDAG &DAG, unsigned X86Opcode, bool Mode, bool isUndefLO, bool isUndefHI)
Emit a sequence of two 128-bit horizontal add/sub followed by a concat_vector.
HADD - Integer horizontal add.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool hasSSSE3() const
Definition: X86Subtarget.h:314
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
Return true if N implements a horizontal binop and return the operands for the horizontal binop into ...
Simple binary floating point operators.
Definition: ISDOpcodes.h:227
static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, const X86Subtarget *Subtarget)
Try to fold a build_vector that performs an 'addsub' into the sequence of 'vadd + vsub + blendi'...
bool hasAVX2() const
Definition: X86Subtarget.h:318
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
#define N
LLVM Value Representation.
Definition: Value.h:69
FHADD - Floating point horizontal add.
bool hasAVX() const
Definition: X86Subtarget.h:317
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue PerformCMOVCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL].

Definition at line 19950 of file X86ISelLowering.cpp.

19952  {
19953  SDLoc DL(N);
19954 
19955  // If the flag operand isn't dead, don't touch this CMOV.
19956  if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
19957  return SDValue();
19958 
19959  SDValue FalseOp = N->getOperand(0);
19960  SDValue TrueOp = N->getOperand(1);
19962  SDValue Cond = N->getOperand(3);
19963 
19964  if (CC == X86::COND_E || CC == X86::COND_NE) {
19965  switch (Cond.getOpcode()) {
19966  default: break;
19967  case X86ISD::BSR:
19968  case X86ISD::BSF:
19969  // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
19970  if (DAG.isKnownNeverZero(Cond.getOperand(0)))
19971  return (CC == X86::COND_E) ? FalseOp : TrueOp;
19972  }
19973  }
19974 
19975  SDValue Flags;
19976 
19977  Flags = checkBoolTestSetCCCombine(Cond, CC);
19978  if (Flags.getNode() &&
19979  // Extra check as FCMOV only supports a subset of X86 cond.
19980  (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
19981  SDValue Ops[] = { FalseOp, TrueOp,
19982  DAG.getConstant(CC, MVT::i8), Flags };
19983  return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
19984  }
19985 
19986  // If this is a select between two integer constants, try to do some
19987  // optimizations. Note that the operands are ordered the opposite of SELECT
19988  // operands.
19989  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
19990  if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
19991  // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
19992  // larger than FalseC (the false value).
19993  if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
19995  std::swap(TrueC, FalseC);
19996  std::swap(TrueOp, FalseOp);
19997  }
19998 
19999  // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
20000  // This is efficient for any integer data type (including i8/i16) and
20001  // shift amount.
20002  if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
20003  Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
20004  DAG.getConstant(CC, MVT::i8), Cond);
20005 
20006  // Zero extend the condition if needed.
20007  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
20008 
20009  unsigned ShAmt = TrueC->getAPIntValue().logBase2();
20010  Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
20011  DAG.getConstant(ShAmt, MVT::i8));
20012  if (N->getNumValues() == 2) // Dead flag value?
20013  return DCI.CombineTo(N, Cond, SDValue());
20014  return Cond;
20015  }
20016 
20017  // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
20018  // for any integer data type, including i8/i16.
20019  if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
20020  Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
20021  DAG.getConstant(CC, MVT::i8), Cond);
20022 
20023  // Zero extend the condition if needed.
20024  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
20025  FalseC->getValueType(0), Cond);
20026  Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
20027  SDValue(FalseC, 0));
20028 
20029  if (N->getNumValues() == 2) // Dead flag value?
20030  return DCI.CombineTo(N, Cond, SDValue());
20031  return Cond;
20032  }
20033 
20034  // Optimize cases that will turn into an LEA instruction. This requires
20035  // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
20036  if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
20037  uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
20038  if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
20039 
20040  bool isFastMultiplier = false;
20041  if (Diff < 10) {
20042  switch ((unsigned char)Diff) {
20043  default: break;
20044  case 1: // result = add base, cond
20045  case 2: // result = lea base( , cond*2)
20046  case 3: // result = lea base(cond, cond*2)
20047  case 4: // result = lea base( , cond*4)
20048  case 5: // result = lea base(cond, cond*4)
20049  case 8: // result = lea base( , cond*8)
20050  case 9: // result = lea base(cond, cond*8)
20051  isFastMultiplier = true;
20052  break;
20053  }
20054  }
20055 
20056  if (isFastMultiplier) {
20057  APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
20058  Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
20059  DAG.getConstant(CC, MVT::i8), Cond);
20060  // Zero extend the condition if needed.
20061  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
20062  Cond);
20063  // Scale the condition by the difference.
20064  if (Diff != 1)
20065  Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
20066  DAG.getConstant(Diff, Cond.getValueType()));
20067 
20068  // Add the base if non-zero.
20069  if (FalseC->getAPIntValue() != 0)
20070  Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
20071  SDValue(FalseC, 0));
20072  if (N->getNumValues() == 2) // Dead flag value?
20073  return DCI.CombineTo(N, Cond, SDValue());
20074  return Cond;
20075  }
20076  }
20077  }
20078  }
20079 
20080  // Handle these cases:
20081  // (select (x != c), e, c) -> select (x != c), e, x),
20082  // (select (x == c), c, e) -> select (x == c), x, e)
20083  // where the c is an integer constant, and the "select" is the combination
20084  // of CMOV and CMP.
20085  //
20086  // The rationale for this change is that the conditional-move from a constant
20087  // needs two instructions, however, conditional-move from a register needs
20088  // only one instruction.
20089  //
20090  // CAVEAT: By replacing a constant with a symbolic value, it may obscure
20091  // some instruction-combining opportunities. This opt needs to be
20092  // postponed as late as possible.
20093  //
20094  if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
20095  // the DCI.xxxx conditions are provided to postpone the optimization as
20096  // late as possible.
20097 
20098  ConstantSDNode *CmpAgainst = nullptr;
20099  if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
20100  (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
20101  !isa<ConstantSDNode>(Cond.getOperand(0))) {
20102 
20103  if (CC == X86::COND_NE &&
20104  CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
20106  std::swap(TrueOp, FalseOp);
20107  }
20108 
20109  if (CC == X86::COND_E &&
20110  CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
20111  SDValue Ops[] = { FalseOp, Cond.getOperand(0),
20112  DAG.getConstant(CC, MVT::i8), Cond };
20113  return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
20114  }
20115  }
20116  }
20117 
20118  return SDValue();
20119 }
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
bool isKnownNeverZero(SDValue Op) const
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
unsigned getNumValues() const
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC)
uint64_t getConstantOperandVal(unsigned Num) const
bool use_empty() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
Class for arbitrary precision integers.
Definition: APInt.h:75
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static bool hasFPCMov(unsigned X86CC)
static SDValue PerformEXTRACT_VECTOR_ELTCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index generation and convert it from being a bunch of shuffles and extracts to a simple store and scalar loads to extract the elements.

Definition at line 19036 of file X86ISelLowering.cpp.

19037  {
19038  SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
19039  if (NewOp.getNode())
19040  return NewOp;
19041 
19042  SDValue InputVector = N->getOperand(0);
19043 
19044  // Detect whether we are trying to convert from mmx to i32 and the bitcast
19045  // from mmx to v2i32 has a single usage.
19046  if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
19047  InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
19048  InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
19049  return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
19050  N->getValueType(0),
19051  InputVector.getNode()->getOperand(0));
19052 
19053  // Only operate on vectors of 4 elements, where the alternative shuffling
19054  // gets to be more expensive.
19055  if (InputVector.getValueType() != MVT::v4i32)
19056  return SDValue();
19057 
19058  // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
19059  // single use which is a sign-extend or zero-extend, and all elements are
19060  // used.
19062  unsigned ExtractedElements = 0;
19063  for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
19064  UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
19065  if (UI.getUse().getResNo() != InputVector.getResNo())
19066  return SDValue();
19067 
19068  SDNode *Extract = *UI;
19069  if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
19070  return SDValue();
19071 
19072  if (Extract->getValueType(0) != MVT::i32)
19073  return SDValue();
19074  if (!Extract->hasOneUse())
19075  return SDValue();
19076  if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
19077  Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
19078  return SDValue();
19079  if (!isa<ConstantSDNode>(Extract->getOperand(1)))
19080  return SDValue();
19081 
19082  // Record which element was extracted.
19083  ExtractedElements |=
19084  1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
19085 
19086  Uses.push_back(Extract);
19087  }
19088 
19089  // If not all the elements were used, this may not be worthwhile.
19090  if (ExtractedElements != 15)
19091  return SDValue();
19092 
19093  // Ok, we've now decided to do the transformation.
19094  SDLoc dl(InputVector);
19095 
19096  // Store the value to a temporary stack slot.
19097  SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
19098  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
19099  MachinePointerInfo(), false, false, 0);
19100 
19101  // Replace each use (extract) with a load of the appropriate element.
19102  for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
19103  UE = Uses.end(); UI != UE; ++UI) {
19104  SDNode *Extract = *UI;
19105 
19106  // cOMpute the element's address.
19107  SDValue Idx = Extract->getOperand(1);
19108  unsigned EltSize =
19109  InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
19110  uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
19111  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19112  SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
19113 
19114  SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
19115  StackPtr, OffsetVal);
19116 
19117  // Load the scalar.
19118  SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
19119  ScalarAddr, MachinePointerInfo(),
19120  false, false, false, 0);
19121 
19122  // Replace the exact with the load.
19123  DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
19124  }
19125 
19126  // The replacement was made in place; don't return anything.
19127  return SDValue();
19128 }
SuperClass::iterator iterator
Definition: SmallVector.h:351
bool hasOneUse() const
bool hasOneUse() const
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
EVT getValueType(unsigned ResNo) const
virtual MVT getPointerTy(uint32_t=0) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDNode * getNode() const
get the SDNode which holds the desired result
Extract files back to file system.
Definition: llvm-ar.cpp:110
use_iterator use_begin() const
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=nullptr)
SDValue CreateStackTemporary(EVT VT, unsigned minAlign=1)
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
SDValue getEntryNode() const
Definition: SelectionDAG.h:327
static SDValue PerformFADDCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

PerformFADDCombine - Do target-specific dag combines on floating point adds.

Definition at line 21417 of file X86ISelLowering.cpp.

21418  {
21419  EVT VT = N->getValueType(0);
21420  SDValue LHS = N->getOperand(0);
21421  SDValue RHS = N->getOperand(1);
21422 
21423  // Try to synthesize horizontal adds from adds of shuffles.
21424  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
21425  (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
21426  isHorizontalBinOp(LHS, RHS, true))
21427  return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS);
21428  return SDValue();
21429 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
Return true if N implements a horizontal binop and return the operands for the horizontal binop into ...
bool hasFp256() const
Definition: X86Subtarget.h:320
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
FHADD - Floating point horizontal add.
static SDValue PerformFANDCombine ( SDNode N,
SelectionDAG DAG 
)
static

PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.

Definition at line 21484 of file X86ISelLowering.cpp.

21484  {
21485  // FAND(0.0, x) -> 0.0
21486  // FAND(x, 0.0) -> 0.0
21487  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
21488  if (C->getValueAPF().isPosZero())
21489  return N->getOperand(0);
21490  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
21491  if (C->getValueAPF().isPosZero())
21492  return N->getOperand(1);
21493  return SDValue();
21494 }
const SDValue & getOperand(unsigned Num) const
static SDValue PerformFANDNCombine ( SDNode N,
SelectionDAG DAG 
)
static

PerformFANDNCombine - Do target-specific dag combines on X86ISD::FANDN nodes.

Definition at line 21497 of file X86ISelLowering.cpp.

21497  {
21498  // FANDN(x, 0.0) -> 0.0
21499  // FANDN(0.0, x) -> x
21500  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
21501  if (C->getValueAPF().isPosZero())
21502  return N->getOperand(1);
21503  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
21504  if (C->getValueAPF().isPosZero())
21505  return N->getOperand(1);
21506  return SDValue();
21507 }
const SDValue & getOperand(unsigned Num) const
static SDValue PerformFMACombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 21595 of file X86ISelLowering.cpp.

21596  {
21597  SDLoc dl(N);
21598  EVT VT = N->getValueType(0);
21599 
21600  // Let legalize expand this if it isn't a legal type yet.
21601  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
21602  return SDValue();
21603 
21604  EVT ScalarVT = VT.getScalarType();
21605  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
21606  (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
21607  return SDValue();
21608 
21609  SDValue A = N->getOperand(0);
21610  SDValue B = N->getOperand(1);
21611  SDValue C = N->getOperand(2);
21612 
21613  bool NegA = (A.getOpcode() == ISD::FNEG);
21614  bool NegB = (B.getOpcode() == ISD::FNEG);
21615  bool NegC = (C.getOpcode() == ISD::FNEG);
21616 
21617  // Negative multiplication when NegA xor NegB
21618  bool NegMul = (NegA != NegB);
21619  if (NegA)
21620  A = A.getOperand(0);
21621  if (NegB)
21622  B = B.getOperand(0);
21623  if (NegC)
21624  C = C.getOperand(0);
21625 
21626  unsigned Opcode;
21627  if (!NegMul)
21628  Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
21629  else
21630  Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
21631 
21632  return DAG.getNode(Opcode, dl, VT, A, B, C);
21633 }
bool hasFMA4() const
Definition: X86Subtarget.h:330
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
EVT getScalarType() const
Definition: ValueTypes.h:211
bool isTypeLegal(EVT VT) const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
Definition: test.h:1
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
bool hasFMA() const
Definition: X86Subtarget.h:328
static SDValue PerformFMinFMaxCombine ( SDNode N,
SelectionDAG DAG 
)
static

PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.

Definition at line 21463 of file X86ISelLowering.cpp.

21463  {
21464  assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
21465 
21466  // Only perform optimizations if UnsafeMath is used.
21467  if (!DAG.getTarget().Options.UnsafeFPMath)
21468  return SDValue();
21469 
21470  // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
21471  // into FMINC and FMAXC, which are Commutative operations.
21472  unsigned NewOp = 0;
21473  switch (N->getOpcode()) {
21474  default: llvm_unreachable("unknown opcode");
21475  case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
21476  case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
21477  }
21478 
21479  return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
21480  N->getOperand(0), N->getOperand(1));
21481 }
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:277
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
FMAXC, FMINC - Commutative FMIN and FMAX.
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue PerformFORCombine ( SDNode N,
SelectionDAG DAG 
)
static

PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.

Definition at line 21448 of file X86ISelLowering.cpp.

21448  {
21449  assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
21450  // F[X]OR(0.0, x) -> x
21451  // F[X]OR(x, 0.0) -> x
21452  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
21453  if (C->getValueAPF().isPosZero())
21454  return N->getOperand(1);
21455  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
21456  if (C->getValueAPF().isPosZero())
21457  return N->getOperand(0);
21458  return SDValue();
21459 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
assert(Globals.size() > 1)
static SDValue PerformFSUBCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

PerformFSUBCombine - Do target-specific dag combines on floating point subs.

Definition at line 21432 of file X86ISelLowering.cpp.

21433  {
21434  EVT VT = N->getValueType(0);
21435  SDValue LHS = N->getOperand(0);
21436  SDValue RHS = N->getOperand(1);
21437 
21438  // Try to synthesize horizontal subs from subs of shuffles.
21439  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
21440  (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
21441  isHorizontalBinOp(LHS, RHS, false))
21442  return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS);
21443  return SDValue();
21444 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
FHSUB - Floating point horizontal sub.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
Return true if N implements a horizontal binop and return the operands for the horizontal binop into ...
bool hasFp256() const
Definition: X86Subtarget.h:320
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue PerformINSERTPSCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 21738 of file X86ISelLowering.cpp.

21739  {
21740  SDLoc dl(N);
21741  MVT VT = N->getOperand(1)->getSimpleValueType(0);
21742  assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
21743  "X86insertps is only defined for v4x32");
21744 
21745  SDValue Ld = N->getOperand(1);
21746  if (MayFoldLoad(Ld)) {
21747  // Extract the countS bits from the immediate so we can get the proper
21748  // address when narrowing the vector load to a specific element.
21749  // When the second source op is a memory address, interps doesn't use
21750  // countS and just gets an f32 from that address.
21751  unsigned DestIndex =
21752  cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
21753  Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
21754  } else
21755  return SDValue();
21756 
21757  // Create this as a scalar to vector to match the instruction pattern.
21758  SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
21759  // countS bits are ignored when loading from memory on insertps, which
21760  // means we don't need to explicitly set them to 0.
21761  return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
21762  LoadScalarToVector, N->getOperand(2));
21763 }
static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, SelectionDAG &DAG)
const SDValue & getOperand(unsigned Num) const
static bool MayFoldLoad(SDValue Op)
assert(Globals.size() > 1)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getSimpleValueType(unsigned ResNo) const
static SDValue performIntegerAbsCombine ( SDNode N,
SelectionDAG DAG 
)
static

Definition at line 20816 of file X86ISelLowering.cpp.

20816  {
20817  EVT VT = N->getValueType(0);
20818 
20819  // Since X86 does not have CMOV for 8-bit integer, we don't convert
20820  // 8-bit integer abs to NEG and CMOV.
20821  if (VT.isInteger() && VT.getSizeInBits() == 8)
20822  return SDValue();
20823 
20824  SDValue N0 = N->getOperand(0);
20825  SDValue N1 = N->getOperand(1);
20826  SDLoc DL(N);
20827 
20828  // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
20829  // and change it to SUB and CMOV.
20830  if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
20831  N0.getOpcode() == ISD::ADD &&
20832  N0.getOperand(1) == N1 &&
20833  N1.getOpcode() == ISD::SRA &&
20834  N1.getOperand(0) == N0.getOperand(0))
20835  if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
20836  if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
20837  // Generate SUB & CMOV.
20838  SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
20839  DAG.getConstant(0, VT), N0.getOperand(0));
20840 
20841  SDValue Ops[] = { N0.getOperand(0), Neg,
20842  DAG.getConstant(X86::COND_GE, MVT::i8),
20843  SDValue(Neg.getNode(), 1) };
20844  return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
20845  }
20846  return SDValue();
20847 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
SDVTList getVTList(EVT VT)
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:111
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue PerformINTRINSIC_WO_CHAINCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 20121 of file X86ISelLowering.cpp.

20122  {
20123  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
20124  switch (IntNo) {
20125  default: return SDValue();
20126  // SSE/AVX/AVX2 blend intrinsics.
20127  case Intrinsic::x86_avx2_pblendvb:
20128  case Intrinsic::x86_avx2_pblendw:
20129  case Intrinsic::x86_avx2_pblendd_128:
20130  case Intrinsic::x86_avx2_pblendd_256:
20131  // Don't try to simplify this intrinsic if we don't have AVX2.
20132  if (!Subtarget->hasAVX2())
20133  return SDValue();
20134  // FALL-THROUGH
20135  case Intrinsic::x86_avx_blend_pd_256:
20136  case Intrinsic::x86_avx_blend_ps_256:
20137  case Intrinsic::x86_avx_blendv_pd_256:
20138  case Intrinsic::x86_avx_blendv_ps_256:
20139  // Don't try to simplify this intrinsic if we don't have AVX.
20140  if (!Subtarget->hasAVX())
20141  return SDValue();
20142  // FALL-THROUGH
20143  case Intrinsic::x86_sse41_pblendw:
20144  case Intrinsic::x86_sse41_blendpd:
20145  case Intrinsic::x86_sse41_blendps:
20146  case Intrinsic::x86_sse41_blendvps:
20147  case Intrinsic::x86_sse41_blendvpd:
20148  case Intrinsic::x86_sse41_pblendvb: {
20149  SDValue Op0 = N->getOperand(1);
20150  SDValue Op1 = N->getOperand(2);
20151  SDValue Mask = N->getOperand(3);
20152 
20153  // Don't try to simplify this intrinsic if we don't have SSE4.1.
20154  if (!Subtarget->hasSSE41())
20155  return SDValue();
20156 
20157  // fold (blend A, A, Mask) -> A
20158  if (Op0 == Op1)
20159  return Op0;
20160  // fold (blend A, B, allZeros) -> A
20161  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
20162  return Op0;
20163  // fold (blend A, B, allOnes) -> B
20164  if (ISD::isBuildVectorAllOnes(Mask.getNode()))
20165  return Op1;
20166 
20167  // Simplify the case where the mask is a constant i32 value.
20168  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
20169  if (C->isNullValue())
20170  return Op0;
20171  if (C->isAllOnesValue())
20172  return Op1;
20173  }
20174 
20175  return SDValue();
20176  }
20177 
20178  // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
20179  case Intrinsic::x86_sse2_psrai_w:
20180  case Intrinsic::x86_sse2_psrai_d:
20181  case Intrinsic::x86_avx2_psrai_w:
20182  case Intrinsic::x86_avx2_psrai_d:
20183  case Intrinsic::x86_sse2_psra_w:
20184  case Intrinsic::x86_sse2_psra_d:
20185  case Intrinsic::x86_avx2_psra_w:
20186  case Intrinsic::x86_avx2_psra_d: {
20187  SDValue Op0 = N->getOperand(1);
20188  SDValue Op1 = N->getOperand(2);
20189  EVT VT = Op0.getValueType();
20190  assert(VT.isVector() && "Expected a vector type!");
20191 
20192  if (isa<BuildVectorSDNode>(Op1))
20193  Op1 = Op1.getOperand(0);
20194 
20195  if (!isa<ConstantSDNode>(Op1))
20196  return SDValue();
20197 
20198  EVT SVT = VT.getVectorElementType();
20199  unsigned SVTBits = SVT.getSizeInBits();
20200 
20201  ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
20202  const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
20203  uint64_t ShAmt = C.getZExtValue();
20204 
20205  // Don't try to convert this shift into a ISD::SRA if the shift
20206  // count is bigger than or equal to the element size.
20207  if (ShAmt >= SVTBits)
20208  return SDValue();
20209 
20210  // Trivial case: if the shift count is zero, then fold this
20211  // into the first operand.
20212  if (ShAmt == 0)
20213  return Op0;
20214 
20215  // Replace this packed shift intrinsic with a target independent
20216  // shift dag node.
20217  SDValue Splat = DAG.getConstant(C, VT);
20218  return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat);
20219  }
20220  }
20221 }
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1302
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
bool isBuildVectorAllZeros(const SDNode *N)
EVT getVectorElementType() const
Definition: ValueTypes.h:217
const APInt & getAPIntValue() const
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
bool hasAVX2() const
Definition: X86Subtarget.h:318
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
Class for arbitrary precision integers.
Definition: APInt.h:75
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
bool hasAVX() const
Definition: X86Subtarget.h:317
static SDValue PerformISDSETCCCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 21683 of file X86ISelLowering.cpp.

21684  {
21685  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
21686  SDValue LHS = N->getOperand(0);
21687  SDValue RHS = N->getOperand(1);
21688  EVT VT = N->getValueType(0);
21689  SDLoc DL(N);
21690 
21691  if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
21692  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
21693  if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
21694  SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
21695  LHS.getValueType(), RHS, LHS.getOperand(1));
21696  return DAG.getSetCC(SDLoc(N), N->getValueType(0),
21697  addV, DAG.getConstant(0, addV.getValueType()), CC);
21698  }
21699  if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
21700  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
21701  if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
21702  SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
21703  RHS.getValueType(), LHS, RHS.getOperand(1));
21704  return DAG.getSetCC(SDLoc(N), N->getValueType(0),
21705  addV, DAG.getConstant(0, addV.getValueType()), CC);
21706  }
21707 
21708  if (VT.getScalarType() == MVT::i1) {
21709  bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
21710  (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
21711  bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
21712  if (!IsSEXT0 && !IsVZero0)
21713  return SDValue();
21714  bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
21715  (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
21716  bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
21717 
21718  if (!IsSEXT1 && !IsVZero1)
21719  return SDValue();
21720 
21721  if (IsSEXT0 && IsVZero1) {
21722  assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
21723  if (CC == ISD::SETEQ)
21724  return DAG.getNOT(DL, LHS.getOperand(0), VT);
21725  return LHS.getOperand(0);
21726  }
21727  if (IsSEXT1 && IsVZero0) {
21728  assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
21729  if (CC == ISD::SETEQ)
21730  return DAG.getNOT(DL, RHS.getOperand(0), VT);
21731  return RHS.getOperand(0);
21732  }
21733  }
21734 
21735  return SDValue();
21736 }
bool hasOneUse() const
const SDValue & getOperand(unsigned Num) const
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
EVT getScalarType() const
Definition: ValueTypes.h:211
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
getNOT - Create a bitwise NOT operation as (XOR Val, -1).
***NAME is the name of the raw_ostream unsigned & i1
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Definition: SelectionDAG.h:690
static SDValue PerformLOADCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformLOADCombine - Do target-specific dag combines on LOAD nodes.

Definition at line 20866 of file X86ISelLowering.cpp.

20868  {
20869  LoadSDNode *Ld = cast<LoadSDNode>(N);
20870  EVT RegVT = Ld->getValueType(0);
20871  EVT MemVT = Ld->getMemoryVT();
20872  SDLoc dl(Ld);
20873  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20874  unsigned RegSz = RegVT.getSizeInBits();
20875 
20876  // On Sandybridge unaligned 256bit loads are inefficient.
20878  unsigned Alignment = Ld->getAlignment();
20879  bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
20880  if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
20881  !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
20882  unsigned NumElems = RegVT.getVectorNumElements();
20883  if (NumElems < 2)
20884  return SDValue();
20885 
20886  SDValue Ptr = Ld->getBasePtr();
20887  SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
20888 
20889  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
20890  NumElems/2);
20891  SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
20892  Ld->getPointerInfo(), Ld->isVolatile(),
20893  Ld->isNonTemporal(), Ld->isInvariant(),
20894  Alignment);
20895  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
20896  SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
20897  Ld->getPointerInfo(), Ld->isVolatile(),
20898  Ld->isNonTemporal(), Ld->isInvariant(),
20899  std::min(16U, Alignment));
20900  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
20901  Load1.getValue(1),
20902  Load2.getValue(1));
20903 
20904  SDValue NewVec = DAG.getUNDEF(RegVT);
20905  NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
20906  NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
20907  return DCI.CombineTo(N, NewVec, TF, true);
20908  }
20909 
20910  // If this is a vector EXT Load then attempt to optimize it using a
20911  // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
20912  // expansion is still better than scalar code.
20913  // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
20914  // emit a shuffle and a arithmetic shift.
20915  // TODO: It is possible to support ZExt by zeroing the undef values
20916  // during the shuffle phase or after the shuffle.
20917  if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
20918  (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
20919  assert(MemVT != RegVT && "Cannot extend to the same type");
20920  assert(MemVT.isVector() && "Must load a vector from memory");
20921 
20922  unsigned NumElems = RegVT.getVectorNumElements();
20923  unsigned MemSz = MemVT.getSizeInBits();
20924  assert(RegSz > MemSz && "Register size must be greater than the mem size");
20925 
20926  if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
20927  return SDValue();
20928 
20929  // All sizes must be a power of two.
20930  if (!isPowerOf2_32(RegSz * MemSz * NumElems))
20931  return SDValue();
20932 
20933  // Attempt to load the original value using scalar loads.
20934  // Find the largest scalar type that divides the total loaded size.
20935  MVT SclrLoadTy = MVT::i8;
20936  for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
20937  tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
20938  MVT Tp = (MVT::SimpleValueType)tp;
20939  if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
20940  SclrLoadTy = Tp;
20941  }
20942  }
20943 
20944  // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
20945  if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
20946  (64 <= MemSz))
20947  SclrLoadTy = MVT::f64;
20948 
20949  // Calculate the number of scalar loads that we need to perform
20950  // in order to load our vector from memory.
20951  unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
20952  if (Ext == ISD::SEXTLOAD && NumLoads > 1)
20953  return SDValue();
20954 
20955  unsigned loadRegZize = RegSz;
20956  if (Ext == ISD::SEXTLOAD && RegSz == 256)
20957  loadRegZize /= 2;
20958 
20959  // Represent our vector as a sequence of elements which are the
20960  // largest scalar that we can load.
20961  EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
20962  loadRegZize/SclrLoadTy.getSizeInBits());
20963 
20964  // Represent the data using the same element type that is stored in
20965  // memory. In practice, we ''widen'' MemVT.
20966  EVT WideVecVT =
20967  EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
20968  loadRegZize/MemVT.getScalarType().getSizeInBits());
20969 
20970  assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
20971  "Invalid vector type");
20972 
20973  // We can't shuffle using an illegal type.
20974  if (!TLI.isTypeLegal(WideVecVT))
20975  return SDValue();
20976 
20977  SmallVector<SDValue, 8> Chains;
20978  SDValue Ptr = Ld->getBasePtr();
20979  SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8,
20980  TLI.getPointerTy());
20981  SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
20982 
20983  for (unsigned i = 0; i < NumLoads; ++i) {
20984  // Perform a single load.
20985  SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
20986  Ptr, Ld->getPointerInfo(),
20987  Ld->isVolatile(), Ld->isNonTemporal(),
20988  Ld->isInvariant(), Ld->getAlignment());
20989  Chains.push_back(ScalarLoad.getValue(1));
20990  // Create the first element type using SCALAR_TO_VECTOR in order to avoid
20991  // another round of DAGCombining.
20992  if (i == 0)
20993  Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
20994  else
20995  Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
20996  ScalarLoad, DAG.getIntPtrConstant(i));
20997 
20998  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
20999  }
21000 
21001  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
21002 
21003  // Bitcast the loaded value to a vector of the original element type, in
21004  // the size of the target vector type.
21005  SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
21006  unsigned SizeRatio = RegSz/MemSz;
21007 
21008  if (Ext == ISD::SEXTLOAD) {
21009  // If we have SSE4.1 we can directly emit a VSEXT node.
21010  if (Subtarget->hasSSE41()) {
21011  SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
21012  return DCI.CombineTo(N, Sext, TF, true);
21013  }
21014 
21015  // Otherwise we'll shuffle the small elements in the high bits of the
21016  // larger type and perform an arithmetic shift. If the shift is not legal
21017  // it's better to scalarize.
21018  if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
21019  return SDValue();
21020 
21021  // Redistribute the loaded elements into the different locations.
21022  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
21023  for (unsigned i = 0; i != NumElems; ++i)
21024  ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
21025 
21026  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
21027  DAG.getUNDEF(WideVecVT),
21028  &ShuffleVec[0]);
21029 
21030  Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
21031 
21032  // Build the arithmetic shift.
21033  unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
21035  Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
21036  DAG.getConstant(Amt, RegVT));
21037 
21038  return DCI.CombineTo(N, Shuff, TF, true);
21039  }
21040 
21041  // Redistribute the loaded elements into the different locations.
21042  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
21043  for (unsigned i = 0; i != NumElems; ++i)
21044  ShuffleVec[i*SizeRatio] = i;
21045 
21046  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
21047  DAG.getUNDEF(WideVecVT),
21048  &ShuffleVec[0]);
21049 
21050  // Bitcast to the requested type.
21051  Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
21052  // Replace the original load with the new sequence
21053  // and return the new chain.
21054  return DCI.CombineTo(N, Shuff, TF, true);
21055  }
21056 
21057  return SDValue();
21058 }
SDValue getValue(unsigned R) const
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
unsigned getSizeInBits() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getBasePtr() const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
EVT getValueType(unsigned ResNo) const
EVT getScalarType() const
Definition: ValueTypes.h:211
virtual MVT getPointerTy(uint32_t=0) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
bool hasSSE2() const
Definition: X86Subtarget.h:312
bool isTypeLegal(EVT VT) const
assert(Globals.size() > 1)
bool isNonTemporal() const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
bool isVolatile() const
const MachinePointerInfo & getPointerInfo() const
bool isInvariant() const
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
const SDValue & getChain() const
ISD::LoadExtType getExtensionType() const
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
#define N
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:363
unsigned getAlignment() const
static SDValue PerformMulCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

PerformMulCombine - Optimize a single multiply with constant into two in order to implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.

Definition at line 20226 of file X86ISelLowering.cpp.

20227  {
20228  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
20229  return SDValue();
20230 
20231  EVT VT = N->getValueType(0);
20232  if (VT != MVT::i64)
20233  return SDValue();
20234 
20236  if (!C)
20237  return SDValue();
20238  uint64_t MulAmt = C->getZExtValue();
20239  if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
20240  return SDValue();
20241 
20242  uint64_t MulAmt1 = 0;
20243  uint64_t MulAmt2 = 0;
20244  if ((MulAmt % 9) == 0) {
20245  MulAmt1 = 9;
20246  MulAmt2 = MulAmt / 9;
20247  } else if ((MulAmt % 5) == 0) {
20248  MulAmt1 = 5;
20249  MulAmt2 = MulAmt / 5;
20250  } else if ((MulAmt % 3) == 0) {
20251  MulAmt1 = 3;
20252  MulAmt2 = MulAmt / 3;
20253  }
20254  if (MulAmt2 &&
20255  (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
20256  SDLoc DL(N);
20257 
20258  if (isPowerOf2_64(MulAmt2) &&
20259  !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
20260  // If second multiplifer is pow2, issue it first. We want the multiply by
20261  // 3, 5, or 9 to be folded into the addressing mode unless the lone use
20262  // is an add.
20263  std::swap(MulAmt1, MulAmt2);
20264 
20265  SDValue NewMul;
20266  if (isPowerOf2_64(MulAmt1))
20267  NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
20268  DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
20269  else
20270  NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
20271  DAG.getConstant(MulAmt1, VT));
20272 
20273  if (isPowerOf2_64(MulAmt2))
20274  NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
20275  DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
20276  else
20277  NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
20278  DAG.getConstant(MulAmt2, VT));
20279 
20280  // Do not add new nodes to DAG combiner worklist.
20281  DCI.CombineTo(N, NewMul, false);
20282  }
20283  return SDValue();
20284 }
bool hasOneUse() const
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
use_iterator use_begin() const
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
bool isPowerOf2_64(uint64_t Value)
Definition: MathExtras.h:369
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
unsigned Log2_64(uint64_t Value)
Definition: MathExtras.h:458
uint64_t getZExtValue() const
static SDValue PerformOrCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 20654 of file X86ISelLowering.cpp.

20656  {
20657  if (DCI.isBeforeLegalizeOps())
20658  return SDValue();
20659 
20660  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
20661  if (R.getNode())
20662  return R;
20663 
20664  SDValue N0 = N->getOperand(0);
20665  SDValue N1 = N->getOperand(1);
20666  EVT VT = N->getValueType(0);
20667 
20668  // look for psign/blend
20669  if (VT == MVT::v2i64 || VT == MVT::v4i64) {
20670  if (!Subtarget->hasSSSE3() ||
20671  (VT == MVT::v4i64 && !Subtarget->hasInt256()))
20672  return SDValue();
20673 
20674  // Canonicalize pandn to RHS
20675  if (N0.getOpcode() == X86ISD::ANDNP)
20676  std::swap(N0, N1);
20677  // or (and (m, y), (pandn m, x))
20678  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
20679  SDValue Mask = N1.getOperand(0);
20680  SDValue X = N1.getOperand(1);
20681  SDValue Y;
20682  if (N0.getOperand(0) == Mask)
20683  Y = N0.getOperand(1);
20684  if (N0.getOperand(1) == Mask)
20685  Y = N0.getOperand(0);
20686 
20687  // Check to see if the mask appeared in both the AND and ANDNP and
20688  if (!Y.getNode())
20689  return SDValue();
20690 
20691  // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
20692  // Look through mask bitcast.
20693  if (Mask.getOpcode() == ISD::BITCAST)
20694  Mask = Mask.getOperand(0);
20695  if (X.getOpcode() == ISD::BITCAST)
20696  X = X.getOperand(0);
20697  if (Y.getOpcode() == ISD::BITCAST)
20698  Y = Y.getOperand(0);
20699 
20700  EVT MaskVT = Mask.getValueType();
20701 
20702  // Validate that the Mask operand is a vector sra node.
20703  // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
20704  // there is no psrai.b
20705  unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
20706  unsigned SraAmt = ~0;
20707  if (Mask.getOpcode() == ISD::SRA) {
20708  if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
20709  if (auto *AmtConst = AmtBV->getConstantSplatNode())
20710  SraAmt = AmtConst->getZExtValue();
20711  } else if (Mask.getOpcode() == X86ISD::VSRAI) {
20712  SDValue SraC = Mask.getOperand(1);
20713  SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
20714  }
20715  if ((SraAmt + 1) != EltBits)
20716  return SDValue();
20717 
20718  SDLoc DL(N);
20719 
20720  // Now we know we at least have a plendvb with the mask val. See if
20721  // we can form a psignb/w/d.
20722  // psign = x.type == y.type == mask.type && y = sub(0, x);
20723  if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
20725  X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
20726  assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
20727  "Unsupported VT for PSIGN");
20728  Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
20729  return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
20730  }
20731  // PBLENDVB only available on SSE 4.1
20732  if (!Subtarget->hasSSE41())
20733  return SDValue();
20734 
20735  EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
20736 
20737  X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
20738  Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
20739  Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
20740  Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
20741  return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
20742  }
20743  }
20744 
20745  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
20746  return SDValue();
20747 
20748  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
20749  MachineFunction &MF = DAG.getMachineFunction();
20750  bool OptForSize = MF.getFunction()->getAttributes().
20751  hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
20752 
20753  // SHLD/SHRD instructions have lower register pressure, but on some
20754  // platforms they have higher latency than the equivalent
20755  // series of shifts/or that would otherwise be generated.
20756  // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
20757  // have higher latencies and we are not optimizing for size.
20758  if (!OptForSize && Subtarget->isSHLDSlow())
20759  return SDValue();
20760 
20761  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
20762  std::swap(N0, N1);
20763  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
20764  return SDValue();
20765  if (!N0.hasOneUse() || !N1.hasOneUse())
20766  return SDValue();
20767 
20768  SDValue ShAmt0 = N0.getOperand(1);
20769  if (ShAmt0.getValueType() != MVT::i8)
20770  return SDValue();
20771  SDValue ShAmt1 = N1.getOperand(1);
20772  if (ShAmt1.getValueType() != MVT::i8)
20773  return SDValue();
20774  if (ShAmt0.getOpcode() == ISD::TRUNCATE)
20775  ShAmt0 = ShAmt0.getOperand(0);
20776  if (ShAmt1.getOpcode() == ISD::TRUNCATE)
20777  ShAmt1 = ShAmt1.getOperand(0);
20778 
20779  SDLoc DL(N);
20780  unsigned Opc = X86ISD::SHLD;
20781  SDValue Op0 = N0.getOperand(0);
20782  SDValue Op1 = N1.getOperand(0);
20783  if (ShAmt0.getOpcode() == ISD::SUB) {
20784  Opc = X86ISD::SHRD;
20785  std::swap(Op0, Op1);
20786  std::swap(ShAmt0, ShAmt1);
20787  }
20788 
20789  unsigned Bits = VT.getSizeInBits();
20790  if (ShAmt1.getOpcode() == ISD::SUB) {
20791  SDValue Sum = ShAmt1.getOperand(0);
20792  if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
20793  SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
20794  if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
20795  ShAmt1Op1 = ShAmt1Op1.getOperand(0);
20796  if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
20797  return DAG.getNode(Opc, DL, VT,
20798  Op0, Op1,
20799  DAG.getNode(ISD::TRUNCATE, DL,
20800  MVT::i8, ShAmt0));
20801  }
20802  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
20803  ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
20804  if (ShAmt0C &&
20805  ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
20806  return DAG.getNode(Opc, DL, VT,
20807  N0.getOperand(0), N1.getOperand(0),
20808  DAG.getNode(ISD::TRUNCATE, DL,
20809  MVT::i8, ShAmt0));
20810  }
20811 
20812  return SDValue();
20813 }
PSIGN - Copy integer sign.
bool hasOneUse() const
unsigned getOpcode() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
EVT getVectorElementType() const
Definition: ValueTypes.h:217
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool hasSSSE3() const
Definition: X86Subtarget.h:314
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
unsigned getOpcode() const
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
bool isSHLDSlow() const
Definition: X86Subtarget.h:347
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:170
int64_t getSExtValue() const
bool hasInt256() const
Definition: X86Subtarget.h:321
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
ANDNP - Bitwise Logical AND NOT of Packed FP values.
EVT getValueType() const
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
static SDValue PerformSELECTCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT nodes.

Definition at line 19252 of file X86ISelLowering.cpp.

19254  {
19255  SDLoc DL(N);
19256  SDValue Cond = N->getOperand(0);
19257  // Get the LHS/RHS of the select.
19258  SDValue LHS = N->getOperand(1);
19259  SDValue RHS = N->getOperand(2);
19260  EVT VT = LHS.getValueType();
19261  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19262 
19263  // If we have SSE[12] support, try to form min/max nodes. SSE min/max
19264  // instructions match the semantics of the common C idiom x<y?x:y but not
19265  // x<=y?x:y, because of how they handle negative zero (which can be
19266  // ignored in unsafe-math mode).
19267  if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
19268  VT != MVT::f80 && TLI.isTypeLegal(VT) &&
19269  (Subtarget->hasSSE2() ||
19270  (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
19271  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19272 
19273  unsigned Opcode = 0;
19274  // Check for x CC y ? x : y.
19275  if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
19276  DAG.isEqualTo(RHS, Cond.getOperand(1))) {
19277  switch (CC) {
19278  default: break;
19279  case ISD::SETULT:
19280  // Converting this to a min would handle NaNs incorrectly, and swapping
19281  // the operands would cause it to handle comparisons between positive
19282  // and negative zero incorrectly.
19283  if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
19284  if (!DAG.getTarget().Options.UnsafeFPMath &&
19285  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
19286  break;
19287  std::swap(LHS, RHS);
19288  }
19289  Opcode = X86ISD::FMIN;
19290  break;
19291  case ISD::SETOLE:
19292  // Converting this to a min would handle comparisons between positive
19293  // and negative zero incorrectly.
19294  if (!DAG.getTarget().Options.UnsafeFPMath &&
19295  !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
19296  break;
19297  Opcode = X86ISD::FMIN;
19298  break;
19299  case ISD::SETULE:
19300  // Converting this to a min would handle both negative zeros and NaNs
19301  // incorrectly, but we can swap the operands to fix both.
19302  std::swap(LHS, RHS);
19303  case ISD::SETOLT:
19304  case ISD::SETLT:
19305  case ISD::SETLE:
19306  Opcode = X86ISD::FMIN;
19307  break;
19308 
19309  case ISD::SETOGE:
19310  // Converting this to a max would handle comparisons between positive
19311  // and negative zero incorrectly.
19312  if (!DAG.getTarget().Options.UnsafeFPMath &&
19313  !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
19314  break;
19315  Opcode = X86ISD::FMAX;
19316  break;
19317  case ISD::SETUGT:
19318  // Converting this to a max would handle NaNs incorrectly, and swapping
19319  // the operands would cause it to handle comparisons between positive
19320  // and negative zero incorrectly.
19321  if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
19322  if (!DAG.getTarget().Options.UnsafeFPMath &&
19323  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
19324  break;
19325  std::swap(LHS, RHS);
19326  }
19327  Opcode = X86ISD::FMAX;
19328  break;
19329  case ISD::SETUGE:
19330  // Converting this to a max would handle both negative zeros and NaNs
19331  // incorrectly, but we can swap the operands to fix both.
19332  std::swap(LHS, RHS);
19333  case ISD::SETOGT:
19334  case ISD::SETGT:
19335  case ISD::SETGE:
19336  Opcode = X86ISD::FMAX;
19337  break;
19338  }
19339  // Check for x CC y ? y : x -- a min/max with reversed arms.
19340  } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
19341  DAG.isEqualTo(RHS, Cond.getOperand(0))) {
19342  switch (CC) {
19343  default: break;
19344  case ISD::SETOGE:
19345  // Converting this to a min would handle comparisons between positive
19346  // and negative zero incorrectly, and swapping the operands would
19347  // cause it to handle NaNs incorrectly.
19348  if (!DAG.getTarget().Options.UnsafeFPMath &&
19349  !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
19350  if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
19351  break;
19352  std::swap(LHS, RHS);
19353  }
19354  Opcode = X86ISD::FMIN;
19355  break;
19356  case ISD::SETUGT:
19357  // Converting this to a min would handle NaNs incorrectly.
19358  if (!DAG.getTarget().Options.UnsafeFPMath &&
19359  (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
19360  break;
19361  Opcode = X86ISD::FMIN;
19362  break;
19363  case ISD::SETUGE:
19364  // Converting this to a min would handle both negative zeros and NaNs
19365  // incorrectly, but we can swap the operands to fix both.
19366  std::swap(LHS, RHS);
19367  case ISD::SETOGT:
19368  case ISD::SETGT:
19369  case ISD::SETGE:
19370  Opcode = X86ISD::FMIN;
19371  break;
19372 
19373  case ISD::SETULT:
19374  // Converting this to a max would handle NaNs incorrectly.
19375  if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
19376  break;
19377  Opcode = X86ISD::FMAX;
19378  break;
19379  case ISD::SETOLE:
19380  // Converting this to a max would handle comparisons between positive
19381  // and negative zero incorrectly, and swapping the operands would
19382  // cause it to handle NaNs incorrectly.
19383  if (!DAG.getTarget().Options.UnsafeFPMath &&
19384  !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
19385  if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
19386  break;
19387  std::swap(LHS, RHS);
19388  }
19389  Opcode = X86ISD::FMAX;
19390  break;
19391  case ISD::SETULE:
19392  // Converting this to a max would handle both negative zeros and NaNs
19393  // incorrectly, but we can swap the operands to fix both.
19394  std::swap(LHS, RHS);
19395  case ISD::SETOLT:
19396  case ISD::SETLT:
19397  case ISD::SETLE:
19398  Opcode = X86ISD::FMAX;
19399  break;
19400  }
19401  }
19402 
19403  if (Opcode)
19404  return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
19405  }
19406 
19407  EVT CondVT = Cond.getValueType();
19408  if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
19409  CondVT.getVectorElementType() == MVT::i1) {
19410  // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
19411  // lowering on AVX-512. In this case we convert it to
19412  // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
19413  // The same situation for all 128 and 256-bit vectors of i8 and i16
19414  EVT OpVT = LHS.getValueType();
19415  if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
19416  (OpVT.getVectorElementType() == MVT::i8 ||
19417  OpVT.getVectorElementType() == MVT::i16)) {
19418  Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
19419  DCI.AddToWorklist(Cond.getNode());
19420  return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
19421  }
19422  }
19423  // If this is a select between two integer constants, try to do some
19424  // optimizations.
19425  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
19426  if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
19427  // Don't do this for crazy integer types.
19428  if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
19429  // If this is efficiently invertible, canonicalize the LHSC/RHSC values
19430  // so that TrueC (the true value) is larger than FalseC.
19431  bool NeedsCondInvert = false;
19432 
19433  if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
19434  // Efficiently invertible.
19435  (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
19436  (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
19437  isa<ConstantSDNode>(Cond.getOperand(1))))) {
19438  NeedsCondInvert = true;
19439  std::swap(TrueC, FalseC);
19440  }
19441 
19442  // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
19443  if (FalseC->getAPIntValue() == 0 &&
19444  TrueC->getAPIntValue().isPowerOf2()) {
19445  if (NeedsCondInvert) // Invert the condition if needed.
19446  Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
19447  DAG.getConstant(1, Cond.getValueType()));
19448 
19449  // Zero extend the condition if needed.
19450  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
19451 
19452  unsigned ShAmt = TrueC->getAPIntValue().logBase2();
19453  return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
19454  DAG.getConstant(ShAmt, MVT::i8));
19455  }
19456 
19457  // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
19458  if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
19459  if (NeedsCondInvert) // Invert the condition if needed.
19460  Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
19461  DAG.getConstant(1, Cond.getValueType()));
19462 
19463  // Zero extend the condition if needed.
19464  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
19465  FalseC->getValueType(0), Cond);
19466  return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
19467  SDValue(FalseC, 0));
19468  }
19469 
19470  // Optimize cases that will turn into an LEA instruction. This requires
19471  // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
19472  if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
19473  uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
19474  if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
19475 
19476  bool isFastMultiplier = false;
19477  if (Diff < 10) {
19478  switch ((unsigned char)Diff) {
19479  default: break;
19480  case 1: // result = add base, cond
19481  case 2: // result = lea base( , cond*2)
19482  case 3: // result = lea base(cond, cond*2)
19483  case 4: // result = lea base( , cond*4)
19484  case 5: // result = lea base(cond, cond*4)
19485  case 8: // result = lea base( , cond*8)
19486  case 9: // result = lea base(cond, cond*8)
19487  isFastMultiplier = true;
19488  break;
19489  }
19490  }
19491 
19492  if (isFastMultiplier) {
19493  APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
19494  if (NeedsCondInvert) // Invert the condition if needed.
19495  Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
19496  DAG.getConstant(1, Cond.getValueType()));
19497 
19498  // Zero extend the condition if needed.
19499  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
19500  Cond);
19501  // Scale the condition by the difference.
19502  if (Diff != 1)
19503  Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
19504  DAG.getConstant(Diff, Cond.getValueType()));
19505 
19506  // Add the base if non-zero.
19507  if (FalseC->getAPIntValue() != 0)
19508  Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
19509  SDValue(FalseC, 0));
19510  return Cond;
19511  }
19512  }
19513  }
19514  }
19515 
19516  // Canonicalize max and min:
19517  // (x > y) ? x : y -> (x >= y) ? x : y
19518  // (x < y) ? x : y -> (x <= y) ? x : y
19519  // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
19520  // the need for an extra compare
19521  // against zero. e.g.
19522  // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
19523  // subl %esi, %edi
19524  // testl %edi, %edi
19525  // movl $0, %eax
19526  // cmovgl %edi, %eax
19527  // =>
19528  // xorl %eax, %eax
19529  // subl %esi, $edi
19530  // cmovsl %eax, %edi
19531  if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
19532  DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
19533  DAG.isEqualTo(RHS, Cond.getOperand(1))) {
19534  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19535  switch (CC) {
19536  default: break;
19537  case ISD::SETLT:
19538  case ISD::SETGT: {
19539  ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
19540  Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
19541  Cond.getOperand(0), Cond.getOperand(1), NewCC);
19542  return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
19543  }
19544  }
19545  }
19546 
19547  // Early exit check
19548  if (!TLI.isTypeLegal(VT))
19549  return SDValue();
19550 
19551  // Match VSELECTs into subs with unsigned saturation.
19552  if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
19553  // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
19554  ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
19555  (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
19556  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19557 
19558  // Check if one of the arms of the VSELECT is a zero vector. If it's on the
19559  // left side invert the predicate to simplify logic below.
19560  SDValue Other;
19561  if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
19562  Other = RHS;
19563  CC = ISD::getSetCCInverse(CC, true);
19564  } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
19565  Other = LHS;
19566  }
19567 
19568  if (Other.getNode() && Other->getNumOperands() == 2 &&
19569  DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
19570  SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
19571  SDValue CondRHS = Cond->getOperand(1);
19572 
19573  // Look for a general sub with unsigned saturation first.
19574  // x >= y ? x-y : 0 --> subus x, y
19575  // x > y ? x-y : 0 --> subus x, y
19576  if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
19577  Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
19578  return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
19579 
19580  if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
19581  if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
19582  if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
19583  if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
19584  // If the RHS is a constant we have to reverse the const
19585  // canonicalization.
19586  // x > C-1 ? x+-C : 0 --> subus x, C
19587  if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
19588  CondRHSConst->getAPIntValue() ==
19589  (-OpRHSConst->getAPIntValue() - 1))
19590  return DAG.getNode(
19591  X86ISD::SUBUS, DL, VT, OpLHS,
19592  DAG.getConstant(-OpRHSConst->getAPIntValue(), VT));
19593 
19594  // Another special case: If C was a sign bit, the sub has been
19595  // canonicalized into a xor.
19596  // FIXME: Would it be better to use computeKnownBits to determine
19597  // whether it's safe to decanonicalize the xor?
19598  // x s< 0 ? x^C : 0 --> subus x, C
19599  if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
19600  ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
19601  OpRHSConst->getAPIntValue().isSignBit())
19602  // Note that we have to rebuild the RHS constant here to ensure we
19603  // don't rely on particular values of undef lanes.
19604  return DAG.getNode(
19605  X86ISD::SUBUS, DL, VT, OpLHS,
19606  DAG.getConstant(OpRHSConst->getAPIntValue(), VT));
19607  }
19608  }
19609  }
19610 
19611  // Try to match a min/max vector operation.
19612  if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
19613  std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
19614  unsigned Opc = ret.first;
19615  bool NeedSplit = ret.second;
19616 
19617  if (Opc && NeedSplit) {
19618  unsigned NumElems = VT.getVectorNumElements();
19619  // Extract the LHS vectors
19620  SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
19621  SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
19622 
19623  // Extract the RHS vectors
19624  SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
19625  SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
19626 
19627  // Create min/max for each subvector
19628  LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
19629  RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
19630 
19631  // Merge the result
19632  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
19633  } else if (Opc)
19634  return DAG.getNode(Opc, DL, VT, LHS, RHS);
19635  }
19636 
19637  // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
19638  if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
19639  // Check if SETCC has already been promoted
19640  TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT &&
19641  // Check that condition value type matches vselect operand type
19642  CondVT == VT) {
19643 
19644  assert(Cond.getValueType().isVector() &&
19645  "vector select expects a vector selector!");
19646 
19647  bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
19648  bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
19649 
19650  if (!TValIsAllOnes && !FValIsAllZeros) {
19651  // Try invert the condition if true value is not all 1s and false value
19652  // is not all 0s.
19653  bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
19654  bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
19655 
19656  if (TValIsAllZeros || FValIsAllOnes) {
19657  SDValue CC = Cond.getOperand(2);
19658  ISD::CondCode NewCC =
19659  ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
19660  Cond.getOperand(0).getValueType().isInteger());
19661  Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
19662  std::swap(LHS, RHS);
19663  TValIsAllOnes = FValIsAllOnes;
19664  FValIsAllZeros = TValIsAllZeros;
19665  }
19666  }
19667 
19668  if (TValIsAllOnes || FValIsAllZeros) {
19669  SDValue Ret;
19670 
19671  if (TValIsAllOnes && FValIsAllZeros)
19672  Ret = Cond;
19673  else if (TValIsAllOnes)
19674  Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond,
19675  DAG.getNode(ISD::BITCAST, DL, CondVT, RHS));
19676  else if (FValIsAllZeros)
19677  Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
19678  DAG.getNode(ISD::BITCAST, DL, CondVT, LHS));
19679 
19680  return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
19681  }
19682  }
19683 
19684  // Try to fold this VSELECT into a MOVSS/MOVSD
19685  if (N->getOpcode() == ISD::VSELECT &&
19686  Cond.getOpcode() == ISD::BUILD_VECTOR && !DCI.isBeforeLegalize()) {
19687  if (VT == MVT::v4i32 || VT == MVT::v4f32 ||
19688  (Subtarget->hasSSE2() && (VT == MVT::v2i64 || VT == MVT::v2f64))) {
19689  bool CanFold = false;
19690  unsigned NumElems = Cond.getNumOperands();
19691  SDValue A = LHS;
19692  SDValue B = RHS;
19693 
19694  if (isZero(Cond.getOperand(0))) {
19695  CanFold = true;
19696 
19697  // fold (vselect <0,-1,-1,-1>, A, B) -> (movss A, B)
19698  // fold (vselect <0,-1> -> (movsd A, B)
19699  for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
19700  CanFold = isAllOnes(Cond.getOperand(i));
19701  } else if (isAllOnes(Cond.getOperand(0))) {
19702  CanFold = true;
19703  std::swap(A, B);
19704 
19705  // fold (vselect <-1,0,0,0>, A, B) -> (movss B, A)
19706  // fold (vselect <-1,0> -> (movsd B, A)
19707  for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
19708  CanFold = isZero(Cond.getOperand(i));
19709  }
19710 
19711  if (CanFold) {
19712  if (VT == MVT::v4i32 || VT == MVT::v4f32)
19713  return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
19714  return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
19715  }
19716 
19717  if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) {
19718  // fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
19719  // (v4i32 (bitcast (movsd (v2i64 (bitcast A)),
19720  // (v2i64 (bitcast B)))))
19721  //
19722  // fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
19723  // (v4f32 (bitcast (movsd (v2f64 (bitcast A)),
19724  // (v2f64 (bitcast B)))))
19725  //
19726  // fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
19727  // (v4i32 (bitcast (movsd (v2i64 (bitcast B)),
19728  // (v2i64 (bitcast A)))))
19729  //
19730  // fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
19731  // (v4f32 (bitcast (movsd (v2f64 (bitcast B)),
19732  // (v2f64 (bitcast A)))))
19733 
19734  CanFold = (isZero(Cond.getOperand(0)) &&
19735  isZero(Cond.getOperand(1)) &&
19736  isAllOnes(Cond.getOperand(2)) &&
19737  isAllOnes(Cond.getOperand(3)));
19738 
19739  if (!CanFold && isAllOnes(Cond.getOperand(0)) &&
19740  isAllOnes(Cond.getOperand(1)) &&
19741  isZero(Cond.getOperand(2)) &&
19742  isZero(Cond.getOperand(3))) {
19743  CanFold = true;
19744  std::swap(LHS, RHS);
19745  }
19746 
19747  if (CanFold) {
19748  EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64;
19749  SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS);
19750  SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS);
19752  NewB, DAG);
19753  return DAG.getNode(ISD::BITCAST, DL, VT, Select);
19754  }
19755  }
19756  }
19757  }
19758 
19759  // If we know that this node is legal then we know that it is going to be
19760  // matched by one of the SSE/AVX BLEND instructions. These instructions only
19761  // depend on the highest bit in each word. Try to use SimplifyDemandedBits
19762  // to simplify previous instructions.
19763  if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
19764  !DCI.isBeforeLegalize() &&
19765  // We explicitly check against v8i16 and v16i16 because, although
19766  // they're marked as Custom, they might only be legal when Cond is a
19767  // build_vector of constants. This will be taken care in a later
19768  // condition.
19769  (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
19770  VT != MVT::v8i16)) {
19771  unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
19772 
19773  // Don't optimize vector selects that map to mask-registers.
19774  if (BitWidth == 1)
19775  return SDValue();
19776 
19777  // Check all uses of that condition operand to check whether it will be
19778  // consumed by non-BLEND instructions, which may depend on all bits are set
19779  // properly.
19780  for (SDNode::use_iterator I = Cond->use_begin(),
19781  E = Cond->use_end(); I != E; ++I)
19782  if (I->getOpcode() != ISD::VSELECT)
19783  // TODO: Add other opcodes eventually lowered into BLEND.
19784  return SDValue();
19785 
19786  assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
19787  APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
19788 
19789  APInt KnownZero, KnownOne;
19791  DCI.isBeforeLegalizeOps());
19792  if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
19793  TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
19794  DCI.CommitTargetLoweringOpt(TLO);
19795  }
19796 
19797  // We should generate an X86ISD::BLENDI from a vselect if its argument
19798  // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
19799  // constants. This specific pattern gets generated when we split a
19800  // selector for a 512 bit vector in a machine without AVX512 (but with
19801  // 256-bit vectors), during legalization:
19802  //
19803  // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
19804  //
19805  // Iff we find this pattern and the build_vectors are built from
19806  // constants, we translate the vselect into a shuffle_vector that we
19807  // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
19808  if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
19810  if (Shuffle.getNode())
19811  return Shuffle;
19812  }
19813 
19814  return SDValue();
19815 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
bool isKnownNeverNaN(SDValue Op) const
isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:277
unsigned getOpcode() const
static bool isZero(SDValue V)
isZero - Returns true if Elt is a constant integer zero
static bool isAllOnes(SDValue V)
unsigned getNumOperands() const
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth=0) const
bool isKnownNeverZero(SDValue Op) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue V1, SelectionDAG &DAG)
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const
EVT getScalarType() const
Definition: ValueTypes.h:211
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:111
EVT getVectorElementType() const
Definition: ValueTypes.h:217
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
const SDValue & getOperand(unsigned i) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
bool hasAVX2() const
Definition: X86Subtarget.h:318
unsigned getOpcode() const
static SDValue TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
bool isEqualTo(SDValue A, SDValue B) const
Definition: test.h:1
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
***NAME is the name of the raw_ostream unsigned & i1
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
Class for arbitrary precision integers.
Definition: APInt.h:75
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool hasSSE1() const
Definition: X86Subtarget.h:311
static std::pair< unsigned, bool > matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const X86Subtarget *Subtarget)
Matches a VSELECT onto min/max or return 0 if the node doesn't match.
void Shuffle(internal::Random *random, std::vector< E > *v)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
#define I(x, y, z)
Definition: MD5.cpp:54
bool hasAVX512() const
Definition: X86Subtarget.h:319
EVT getValueType() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:106
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Definition: SelectionDAG.h:690
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue PerformSETCCCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 21782 of file X86ISelLowering.cpp.

21784  {
21785  SDLoc DL(N);
21787  SDValue EFLAGS = N->getOperand(1);
21788 
21789  if (CC == X86::COND_A) {
21790  // Try to convert COND_A into COND_B in an attempt to facilitate
21791  // materializing "setb reg".
21792  //
21793  // Do not flip "e > c", where "c" is a constant, because Cmp instruction
21794  // cannot take an immediate as its first operand.
21795  //
21796  if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
21797  EFLAGS.getValueType().isInteger() &&
21798  !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
21799  SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
21800  EFLAGS.getNode()->getVTList(),
21801  EFLAGS.getOperand(1), EFLAGS.getOperand(0));
21802  SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
21803  return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
21804  }
21805  }
21806 
21807  // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
21808  // a zext and produces an all-ones bit which is more useful than 0/1 in some
21809  // cases.
21810  if (CC == X86::COND_B)
21811  return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
21812 
21813  SDValue Flags;
21814 
21815  Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
21816  if (Flags.getNode()) {
21817  SDValue Cond = DAG.getConstant(CC, MVT::i8);
21818  return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
21819  }
21820 
21821  return SDValue();
21822 }
SDVTList getVTList() const
static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, MVT VT)
const SDValue & getOperand(unsigned Num) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDNode * getNode() const
get the SDNode which holds the desired result
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC)
uint64_t getConstantOperandVal(unsigned Num) const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
MVT getSimpleValueType(unsigned ResNo) const
static SDValue PerformSExtCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 21576 of file X86ISelLowering.cpp.

21578  {
21579  if (!DCI.isBeforeLegalizeOps())
21580  return SDValue();
21581 
21582  if (!Subtarget->hasFp256())
21583  return SDValue();
21584 
21585  EVT VT = N->getValueType(0);
21586  if (VT.isVector() && VT.getSizeInBits() == 256) {
21587  SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
21588  if (R.getNode())
21589  return R;
21590  }
21591 
21592  return SDValue();
21593 }
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasFp256() const
Definition: X86Subtarget.h:320
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
static SDValue PerformShiftCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformShiftCombine - Combine shifts.

Definition at line 20359 of file X86ISelLowering.cpp.

20361  {
20362  if (N->getOpcode() == ISD::SHL) {
20363  SDValue V = PerformSHLCombine(N, DAG);
20364  if (V.getNode()) return V;
20365  }
20366 
20367  if (N->getOpcode() != ISD::SRA) {
20368  // Try to fold this logical shift into a zero vector.
20369  SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
20370  if (V.getNode()) return V;
20371  }
20372 
20373  return SDValue();
20374 }
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget)
Returns a vector of 0s if the node in input is a vector logical shift by a constant amount which is k...
unsigned getOpcode() const
SDNode * getNode() const
get the SDNode which holds the desired result
static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performShiftToAllZeros ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Returns a vector of 0s if the node in input is a vector logical shift by a constant amount which is known to be bigger than or equal to the vector element size in bits.

Definition at line 20331 of file X86ISelLowering.cpp.

20332  {
20333  EVT VT = N->getValueType(0);
20334 
20335  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
20336  (!Subtarget->hasInt256() ||
20337  (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
20338  return SDValue();
20339 
20340  SDValue Amt = N->getOperand(1);
20341  SDLoc DL(N);
20342  if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
20343  if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
20344  APInt ShiftAmt = AmtSplat->getAPIntValue();
20345  unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
20346 
20347  // SSE2/AVX2 logical shifts always return a vector of 0s
20348  // if the shift amount is bigger than or equal to
20349  // the element size. The constant shift amount will be
20350  // encoded as a 8-bit immediate.
20351  if (ShiftAmt.trunc(8).uge(MaxAmount))
20352  return getZeroVector(VT, Subtarget, DAG, DL);
20353  }
20354 
20355  return SDValue();
20356 }
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
APInt LLVM_ATTRIBUTE_UNUSED_RESULT trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:920
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1112
Class for arbitrary precision integers.
Definition: APInt.h:75
bool hasInt256() const
Definition: X86Subtarget.h:321
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
static SDValue PerformSHLCombine ( SDNode N,
SelectionDAG DAG 
)
static

Definition at line 20286 of file X86ISelLowering.cpp.

20286  {
20287  SDValue N0 = N->getOperand(0);
20288  SDValue N1 = N->getOperand(1);
20290  EVT VT = N0.getValueType();
20291 
20292  // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
20293  // since the result of setcc_c is all zero's or all ones.
20294  if (VT.isInteger() && !VT.isVector() &&
20295  N1C && N0.getOpcode() == ISD::AND &&
20296  N0.getOperand(1).getOpcode() == ISD::Constant) {
20297  SDValue N00 = N0.getOperand(0);
20298  if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
20299  ((N00.getOpcode() == ISD::ANY_EXTEND ||
20300  N00.getOpcode() == ISD::ZERO_EXTEND) &&
20301  N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
20302  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
20303  APInt ShAmt = N1C->getAPIntValue();
20304  Mask = Mask.shl(ShAmt);
20305  if (Mask != 0)
20306  return DAG.getNode(ISD::AND, SDLoc(N), VT,
20307  N00, DAG.getConstant(Mask, VT));
20308  }
20309  }
20310 
20311  // Hardware support for vector shifts is sparse which makes us scalarize the
20312  // vector operations in many cases. Also, on sandybridge ADD is faster than
20313  // shl.
20314  // (shl V, 1) -> add V,V
20315  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
20316  if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
20317  assert(N0.getValueType().isVector() && "Invalid vector shift type");
20318  // We shift all of the values by one. In many cases we do not have
20319  // hardware support for this operation. This is better expressed as an ADD
20320  // of two values.
20321  if (N1SplatC->getZExtValue() == 1)
20322  return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
20323  }
20324 
20325  return SDValue();
20326 }
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
const APInt & getAPIntValue() const
APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:852
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
Class for arbitrary precision integers.
Definition: APInt.h:75
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue PerformShuffleCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformShuffleCombine - Performs several different shuffle combines.

Definition at line 18792 of file X86ISelLowering.cpp.

18794  {
18795  SDLoc dl(N);
18796  SDValue N0 = N->getOperand(0);
18797  SDValue N1 = N->getOperand(1);
18798  EVT VT = N->getValueType(0);
18799 
18800  // Canonicalize shuffles that perform 'addsub' on packed float vectors
18801  // according to the rule:
18802  // (shuffle (FADD A, B), (FSUB A, B), Mask) ->
18803  // (shuffle (FSUB A, -B), (FADD A, -B), Mask)
18804  //
18805  // Where 'Mask' is:
18806  // <0,5,2,7> -- for v4f32 and v4f64 shuffles;
18807  // <0,3> -- for v2f64 shuffles;
18808  // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles.
18809  //
18810  // This helps pattern-matching more SSE3/AVX ADDSUB instructions
18811  // during ISel stage.
18812  if (N->getOpcode() == ISD::VECTOR_SHUFFLE &&
18813  ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
18814  (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
18815  N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB &&
18816  // Operands to the FADD and FSUB must be the same.
18817  ((N0->getOperand(0) == N1->getOperand(0) &&
18818  N0->getOperand(1) == N1->getOperand(1)) ||
18819  // FADD is commutable. See if by commuting the operands of the FADD
18820  // we would still be able to match the operands of the FSUB dag node.
18821  (N0->getOperand(1) == N1->getOperand(0) &&
18822  N0->getOperand(0) == N1->getOperand(1))) &&
18823  N0->getOperand(0)->getOpcode() != ISD::UNDEF &&
18824  N0->getOperand(1)->getOpcode() != ISD::UNDEF) {
18825 
18826  ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
18827  unsigned NumElts = VT.getVectorNumElements();
18828  ArrayRef<int> Mask = SV->getMask();
18829  bool CanFold = true;
18830 
18831  for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i)
18832  CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i);
18833 
18834  if (CanFold) {
18835  SDValue Op0 = N1->getOperand(0);
18836  SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1));
18837  SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1);
18838  SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1);
18839  return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask);
18840  }
18841  }
18842 
18843  // Don't create instructions with illegal types after legalize types has run.
18844  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18845  if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
18846  return SDValue();
18847 
18848  // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
18849  if (Subtarget->hasFp256() && VT.is256BitVector() &&
18851  return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
18852 
18853  // During Type Legalization, when promoting illegal vector types,
18854  // the backend might introduce new shuffle dag nodes and bitcasts.
18855  //
18856  // This code performs the following transformation:
18857  // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
18858  // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
18859  //
18860  // We do this only if both the bitcast and the BINOP dag nodes have
18861  // one use. Also, perform this transformation only if the new binary
18862  // operation is legal. This is to avoid introducing dag nodes that
18863  // potentially need to be further expanded (or custom lowered) into a
18864  // less optimal sequence of dag nodes.
18865  if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
18866  N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
18867  N0.getOpcode() == ISD::BITCAST) {
18868  SDValue BC0 = N0.getOperand(0);
18869  EVT SVT = BC0.getValueType();
18870  unsigned Opcode = BC0.getOpcode();
18871  unsigned NumElts = VT.getVectorNumElements();
18872 
18873  if (BC0.hasOneUse() && SVT.isVector() &&
18874  SVT.getVectorNumElements() * 2 == NumElts &&
18875  TLI.isOperationLegal(Opcode, VT)) {
18876  bool CanFold = false;
18877  switch (Opcode) {
18878  default : break;
18879  case ISD::ADD :
18880  case ISD::FADD :
18881  case ISD::SUB :
18882  case ISD::FSUB :
18883  case ISD::MUL :
18884  case ISD::FMUL :
18885  CanFold = true;
18886  }
18887 
18888  unsigned SVTNumElts = SVT.getVectorNumElements();
18889  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
18890  for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
18891  CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
18892  for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
18893  CanFold = SVOp->getMaskElt(i) < 0;
18894 
18895  if (CanFold) {
18896  SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
18897  SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
18898  SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
18899  return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
18900  }
18901  }
18902  }
18903 
18904  // Only handle 128 wide vector from here on.
18905  if (!VT.is128BitVector())
18906  return SDValue();
18907 
18908  // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
18909  // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
18910  // consecutive, non-overlapping, and in the right order.
18912  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
18913  Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
18914 
18915  SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
18916  if (LD.getNode())
18917  return LD;
18918 
18919  if (isTargetShuffle(N->getOpcode())) {
18920  SDValue Shuffle =
18921  PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
18922  if (Shuffle.getNode())
18923  return Shuffle;
18924  }
18925 
18926  return SDValue();
18927 }
bool hasSSE3() const
Definition: X86Subtarget.h:313
bool hasOneUse() const
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth)
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
Try to combine x86 target specific shuffles.
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl< SDValue > &Elts, SDLoc &DL, SelectionDAG &DAG, bool isAfterLegalize)
int getMaskElt(unsigned Idx) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
static bool isTargetShuffle(unsigned Opcode)
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
const SDValue & getOperand(unsigned i) const
Simple binary floating point operators.
Definition: ISDOpcodes.h:227
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned getOpcode() const
bool hasFp256() const
Definition: X86Subtarget.h:320
ArrayRef< int > getMask() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
void Shuffle(internal::Random *random, std::vector< E > *v)
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
#define N
EVT getValueType() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
bool hasAVX() const
Definition: X86Subtarget.h:317
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue PerformShuffleCombine256 ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.

Definition at line 18370 of file X86ISelLowering.cpp.

18372  {
18373  SDLoc dl(N);
18374  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
18375  SDValue V1 = SVOp->getOperand(0);
18376  SDValue V2 = SVOp->getOperand(1);
18377  EVT VT = SVOp->getValueType(0);
18378  unsigned NumElems = VT.getVectorNumElements();
18379 
18380  if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
18381  V2.getOpcode() == ISD::CONCAT_VECTORS) {
18382  //
18383  // 0,0,0,...
18384  // |
18385  // V UNDEF BUILD_VECTOR UNDEF
18386  // \ / \ /
18387  // CONCAT_VECTOR CONCAT_VECTOR
18388  // \ /
18389  // \ /
18390  // RESULT: V + zero extended
18391  //
18392  if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
18393  V2.getOperand(1).getOpcode() != ISD::UNDEF ||
18394  V1.getOperand(1).getOpcode() != ISD::UNDEF)
18395  return SDValue();
18396 
18398  return SDValue();
18399 
18400  // To match the shuffle mask, the first half of the mask should
18401  // be exactly the first vector, and all the rest a splat with the
18402  // first element of the second one.
18403  for (unsigned i = 0; i != NumElems/2; ++i)
18404  if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
18405  !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
18406  return SDValue();
18407 
18408  // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
18409  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
18410  if (Ld->hasNUsesOfValue(1, 0)) {
18411  SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
18412  SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
18413  SDValue ResNode =
18414  DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
18415  Ld->getMemoryVT(),
18416  Ld->getPointerInfo(),
18417  Ld->getAlignment(),
18418  false/*isVolatile*/, true/*ReadMem*/,
18419  false/*WriteMem*/);
18420 
18421  // Make sure the newly-created LOAD is in the same position as Ld in
18422  // terms of dependency. We create a TokenFactor for Ld and ResNode,
18423  // and update uses of Ld's output chain to use the TokenFactor.
18424  if (Ld->hasAnyUseOfValue(1)) {
18425  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18426  SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
18427  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
18428  DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
18429  SDValue(ResNode.getNode(), 1));
18430  }
18431 
18432  return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
18433  }
18434  }
18435 
18436  // Emit a zeroed vector and insert the desired subvector on its
18437  // first half.
18438  SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
18439  SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
18440  return DCI.CombineTo(N, InsV);
18441  }
18442 
18443  //===--------------------------------------------------------------------===//
18444  // Combine some shuffles into subvector extracts and inserts:
18445  //
18446 
18447  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
18448  if (isShuffleHigh128VectorInsertLow(SVOp)) {
18449  SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
18450  SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
18451  return DCI.CombineTo(N, InsV);
18452  }
18453 
18454  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
18455  if (isShuffleLow128VectorInsertHigh(SVOp)) {
18456  SDValue V = Extract128BitVector(V1, 0, DAG, dl);
18457  SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
18458  return DCI.CombineTo(N, InsV);
18459  }
18460 
18461  return SDValue();
18462 }
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl)
static bool isUndefOrEqual(int Val, int CmpVal)
const SDValue & getOperand(unsigned Num) const
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
bool isBuildVectorAllZeros(const SDNode *N)
EVT getValueType(unsigned ResNo) const
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp)
#define N
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static SDValue PerformSIGN_EXTEND_INREGCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 21541 of file X86ISelLowering.cpp.

21542  {
21543  EVT VT = N->getValueType(0);
21544  if (!VT.isVector())
21545  return SDValue();
21546 
21547  SDValue N0 = N->getOperand(0);
21548  SDValue N1 = N->getOperand(1);
21549  EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
21550  SDLoc dl(N);
21551 
21552  // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
21553  // both SSE and AVX2 since there is no sign-extended shift right
21554  // operation on a vector with 64-bit elements.
21555  //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
21556  // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
21557  if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
21558  N0.getOpcode() == ISD::SIGN_EXTEND)) {
21559  SDValue N00 = N0.getOperand(0);
21560 
21561  // EXTLOAD has a better solution on AVX2,
21562  // it may be replaced with X86ISD::VSEXT node.
21563  if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
21564  if (!ISD::isNormalLoad(N00.getNode()))
21565  return SDValue();
21566 
21567  if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
21568  SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
21569  N00, N1);
21570  return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
21571  }
21572  }
21573  return SDValue();
21574 }
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
SDNode * getNode() const
get the SDNode which holds the desired result
bool isNormalLoad(const SDNode *N)
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
static SDValue PerformSINT_TO_FPCombine ( SDNode N,
SelectionDAG DAG,
const X86TargetLowering XTLI 
)
static

Definition at line 21893 of file X86ISelLowering.cpp.

21894  {
21895  // First try to optimize away the conversion entirely when it's
21896  // conditionally from a constant. Vectors only.
21898  if (Res != SDValue())
21899  return Res;
21900 
21901  // Now move on to more general possibilities.
21902  SDValue Op0 = N->getOperand(0);
21903  EVT InVT = Op0->getValueType(0);
21904 
21905  // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32))
21906  if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
21907  SDLoc dl(N);
21908  MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
21909  SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
21910  return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
21911  }
21912 
21913  // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
21914  // a 32-bit target where SSE doesn't support i64->FP operations.
21915  if (Op0.getOpcode() == ISD::LOAD) {
21916  LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
21917  EVT VT = Ld->getValueType(0);
21918  if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
21919  ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
21920  !XTLI->getSubtarget()->is64Bit() &&
21921  VT == MVT::i64) {
21922  SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
21923  Ld->getChain(), Op0, DAG);
21924  DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
21925  return FILDChain;
21926  }
21927  }
21928  return SDValue();
21929 }
SDValue getValue(unsigned R) const
bool hasOneUse() const
const X86Subtarget * getSubtarget() const
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
SDNode * getNode() const
get the SDNode which holds the desired result
#define P(N)
unsigned getOpcode() const
bool isVolatile() const
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const
const SDValue & getChain() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool isNON_EXTLoad(const SDNode *N)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
static SDValue PerformSTORECombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

PerformSTORECombine - Do target-specific dag combines on STORE nodes.

Definition at line 21061 of file X86ISelLowering.cpp.

21062  {
21063  StoreSDNode *St = cast<StoreSDNode>(N);
21064  EVT VT = St->getValue().getValueType();
21065  EVT StVT = St->getMemoryVT();
21066  SDLoc dl(St);
21067  SDValue StoredVal = St->getOperand(1);
21068  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21069 
21070  // If we are saving a concatenation of two XMM registers, perform two stores.
21071  // On Sandy Bridge, 256-bit memory operations are executed by two
21072  // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
21073  // memory operation.
21074  unsigned Alignment = St->getAlignment();
21075  bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
21076  if (VT.is256BitVector() && !Subtarget->hasInt256() &&
21077  StVT == VT && !IsAligned) {
21078  unsigned NumElems = VT.getVectorNumElements();
21079  if (NumElems < 2)
21080  return SDValue();
21081 
21082  SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
21083  SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
21084 
21085  SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
21086  SDValue Ptr0 = St->getBasePtr();
21087  SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
21088 
21089  SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
21090  St->getPointerInfo(), St->isVolatile(),
21091  St->isNonTemporal(), Alignment);
21092  SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
21093  St->getPointerInfo(), St->isVolatile(),
21094  St->isNonTemporal(),
21095  std::min(16U, Alignment));
21096  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
21097  }
21098 
21099  // Optimize trunc store (of multiple scalars) to shuffle and store.
21100  // First, pack all of the elements in one place. Next, store to memory
21101  // in fewer chunks.
21102  if (St->isTruncatingStore() && VT.isVector()) {
21103  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21104  unsigned NumElems = VT.getVectorNumElements();
21105  assert(StVT != VT && "Cannot truncate to the same type");
21106  unsigned FromSz = VT.getVectorElementType().getSizeInBits();
21107  unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
21108 
21109  // From, To sizes and ElemCount must be pow of two
21110  if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
21111  // We are going to use the original vector elt for storing.
21112  // Accumulated smaller vector elements must be a multiple of the store size.
21113  if (0 != (NumElems * FromSz) % ToSz) return SDValue();
21114 
21115  unsigned SizeRatio = FromSz / ToSz;
21116 
21117  assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
21118 
21119  // Create a type on which we perform the shuffle
21120  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
21121  StVT.getScalarType(), NumElems*SizeRatio);
21122 
21123  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
21124 
21125  SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
21126  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
21127  for (unsigned i = 0; i != NumElems; ++i)
21128  ShuffleVec[i] = i * SizeRatio;
21129 
21130  // Can't shuffle using an illegal type.
21131  if (!TLI.isTypeLegal(WideVecVT))
21132  return SDValue();
21133 
21134  SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
21135  DAG.getUNDEF(WideVecVT),
21136  &ShuffleVec[0]);
21137  // At this point all of the data is stored at the bottom of the
21138  // register. We now need to save it to mem.
21139 
21140  // Find the largest store unit
21141  MVT StoreType = MVT::i8;
21142  for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
21143  tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
21144  MVT Tp = (MVT::SimpleValueType)tp;
21145  if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
21146  StoreType = Tp;
21147  }
21148 
21149  // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64.
21150  if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
21151  (64 <= NumElems * ToSz))
21152  StoreType = MVT::f64;
21153 
21154  // Bitcast the original vector into a vector of store-size units
21155  EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
21156  StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
21157  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
21158  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
21159  SmallVector<SDValue, 8> Chains;
21160  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
21161  TLI.getPointerTy());
21162  SDValue Ptr = St->getBasePtr();
21163 
21164  // Perform one or more big stores into memory.
21165  for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
21166  SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
21167  StoreType, ShuffWide,
21168  DAG.getIntPtrConstant(i));
21169  SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
21170  St->getPointerInfo(), St->isVolatile(),
21171  St->isNonTemporal(), St->getAlignment());
21172  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
21173  Chains.push_back(Ch);
21174  }
21175 
21176  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
21177  }
21178 
21179  // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
21180  // the FP state in cases where an emms may be missing.
21181  // A preferable solution to the general problem is to figure out the right
21182  // places to insert EMMS. This qualifies as a quick hack.
21183 
21184  // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
21185  if (VT.getSizeInBits() != 64)
21186  return SDValue();
21187 
21188  const Function *F = DAG.getMachineFunction().getFunction();
21189  bool NoImplicitFloatOps = F->getAttributes().
21190  hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
21191  bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
21192  && Subtarget->hasSSE2();
21193  if ((VT.isVector() ||
21194  (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
21195  isa<LoadSDNode>(St->getValue()) &&
21196  !cast<LoadSDNode>(St->getValue())->isVolatile() &&
21197  St->getChain().hasOneUse() && !St->isVolatile()) {
21198  SDNode* LdVal = St->getValue().getNode();
21199  LoadSDNode *Ld = nullptr;
21200  int TokenFactorIndex = -1;
21202  SDNode* ChainVal = St->getChain().getNode();
21203  // Must be a store of a load. We currently handle two cases: the load
21204  // is a direct child, and it's under an intervening TokenFactor. It is
21205  // possible to dig deeper under nested TokenFactors.
21206  if (ChainVal == LdVal)
21207  Ld = cast<LoadSDNode>(St->getChain());
21208  else if (St->getValue().hasOneUse() &&
21209  ChainVal->getOpcode() == ISD::TokenFactor) {
21210  for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
21211  if (ChainVal->getOperand(i).getNode() == LdVal) {
21212  TokenFactorIndex = i;
21213  Ld = cast<LoadSDNode>(St->getValue());
21214  } else
21215  Ops.push_back(ChainVal->getOperand(i));
21216  }
21217  }
21218 
21219  if (!Ld || !ISD::isNormalLoad(Ld))
21220  return SDValue();
21221 
21222  // If this is not the MMX case, i.e. we are just turning i64 load/store
21223  // into f64 load/store, avoid the transformation if there are multiple
21224  // uses of the loaded value.
21225  if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
21226  return SDValue();
21227 
21228  SDLoc LdDL(Ld);
21229  SDLoc StDL(N);
21230  // If we are a 64-bit capable x86, lower to a single movq load/store pair.
21231  // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
21232  // pair instead.
21233  if (Subtarget->is64Bit() || F64IsLegal) {
21234  EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
21235  SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
21236  Ld->getPointerInfo(), Ld->isVolatile(),
21237  Ld->isNonTemporal(), Ld->isInvariant(),
21238  Ld->getAlignment());
21239  SDValue NewChain = NewLd.getValue(1);
21240  if (TokenFactorIndex != -1) {
21241  Ops.push_back(NewChain);
21242  NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
21243  }
21244  return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
21245  St->getPointerInfo(),
21246  St->isVolatile(), St->isNonTemporal(),
21247  St->getAlignment());
21248  }
21249 
21250  // Otherwise, lower to two pairs of 32-bit loads / stores.
21251  SDValue LoAddr = Ld->getBasePtr();
21252  SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
21253  DAG.getConstant(4, MVT::i32));
21254 
21255  SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
21256  Ld->getPointerInfo(),
21257  Ld->isVolatile(), Ld->isNonTemporal(),
21258  Ld->isInvariant(), Ld->getAlignment());
21259  SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
21260  Ld->getPointerInfo().getWithOffset(4),
21261  Ld->isVolatile(), Ld->isNonTemporal(),
21262  Ld->isInvariant(),
21263  MinAlign(Ld->getAlignment(), 4));
21264 
21265  SDValue NewChain = LoLd.getValue(1);
21266  if (TokenFactorIndex != -1) {
21267  Ops.push_back(LoLd);
21268  Ops.push_back(HiLd);
21269  NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
21270  }
21271 
21272  LoAddr = St->getBasePtr();
21273  HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
21274  DAG.getConstant(4, MVT::i32));
21275 
21276  SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
21277  St->getPointerInfo(),
21278  St->isVolatile(), St->isNonTemporal(),
21279  St->getAlignment());
21280  SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
21281  St->getPointerInfo().getWithOffset(4),
21282  St->isVolatile(),
21283  St->isNonTemporal(),
21284  MinAlign(St->getAlignment(), 4));
21285  return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
21286  }
21287  return SDValue();
21288 }
SDValue getValue(unsigned R) const
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
bool hasOneUse() const
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:277
unsigned getOpcode() const
unsigned getSizeInBits() const
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
const SDValue & getBasePtr() const
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:276
MachinePointerInfo getWithOffset(int64_t O) const
EVT getScalarType() const
Definition: ValueTypes.h:211
virtual MVT getPointerTy(uint32_t=0) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:283
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
EVT getMemoryVT() const
getMemoryVT - Return the type of the in-memory value.
bool hasSSE2() const
Definition: X86Subtarget.h:312
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTypeLegal(EVT VT) const
bool isNormalLoad(const SDNode *N)
assert(Globals.size() > 1)
bool isNonTemporal() const
bool isVolatile() const
const SDValue & getValue() const
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo=nullptr)
const MachinePointerInfo & getPointerInfo() const
bool isInvariant() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
SDValue getIntPtrConstant(uint64_t Val, bool isTarget=false)
const SDValue & getChain() const
AttributeSet getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:170
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo=nullptr, const MDNode *Ranges=nullptr)
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
uint64_t MinAlign(uint64_t A, uint64_t B)
Definition: MathExtras.h:544
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
#define N
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
bool isTruncatingStore() const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
bool isPowerOf2_32(uint32_t Value)
Definition: MathExtras.h:363
unsigned getAlignment() const
static SDValue PerformSubCombine ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 22012 of file X86ISelLowering.cpp.

22013  {
22014  SDValue Op0 = N->getOperand(0);
22015  SDValue Op1 = N->getOperand(1);
22016 
22017  // X86 can't encode an immediate LHS of a sub. See if we can push the
22018  // negation into a preceding instruction.
22019  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
22020  // If the RHS of the sub is a XOR with one use and a constant, invert the
22021  // immediate. Then add one to the LHS of the sub so we can turn
22022  // X-Y -> X+~Y+1, saving one register.
22023  if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
22024  isa<ConstantSDNode>(Op1.getOperand(1))) {
22025  APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
22026  EVT VT = Op0.getValueType();
22027  SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
22028  Op1.getOperand(0),
22029  DAG.getConstant(~XorC, VT));
22030  return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
22031  DAG.getConstant(C->getAPIntValue()+1, VT));
22032  }
22033  }
22034 
22035  // Try to synthesize horizontal adds from adds of shuffles.
22036  EVT VT = N->getValueType(0);
22037  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
22038  (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
22039  isHorizontalBinOp(Op0, Op1, true))
22040  return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
22041 
22042  return OptimizeConditionalInDecrement(N, DAG);
22043 }
bool hasOneUse() const
const SDValue & getOperand(unsigned Num) const
HSUB - Integer horizontal sub.
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG)
EVT getValueType(unsigned ResNo) const
bool hasSSSE3() const
Definition: X86Subtarget.h:314
const SDValue & getOperand(unsigned i) const
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, SelectionDAG &DAG, unsigned BaseIdx, unsigned LastIdx, SDValue &V0, SDValue &V1)
Return true if N implements a horizontal binop and return the operands for the horizontal binop into ...
unsigned getOpcode() const
Class for arbitrary precision integers.
Definition: APInt.h:75
bool hasInt256() const
Definition: X86Subtarget.h:321
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static SDValue PerformTargetShuffleCombine ( SDValue  N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Try to combine x86 target specific shuffles.

Definition at line 18687 of file X86ISelLowering.cpp.

18689  {
18690  SDLoc DL(N);
18691  MVT VT = N.getSimpleValueType();
18692  SmallVector<int, 4> Mask;
18693 
18694  switch (N.getOpcode()) {
18695  case X86ISD::PSHUFD:
18696  case X86ISD::PSHUFLW:
18697  case X86ISD::PSHUFHW:
18698  Mask = getPSHUFShuffleMask(N);
18699  assert(Mask.size() == 4);
18700  break;
18701  default:
18702  return SDValue();
18703  }
18704 
18705  // Nuke no-op shuffles that show up after combining.
18706  if (isNoopShuffleMask(Mask))
18707  return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
18708 
18709  // Look for simplifications involving one or two shuffle instructions.
18710  SDValue V = N.getOperand(0);
18711  switch (N.getOpcode()) {
18712  default:
18713  break;
18714  case X86ISD::PSHUFLW:
18715  case X86ISD::PSHUFHW:
18716  assert(VT == MVT::v8i16);
18717  (void)VT;
18718 
18719  if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
18720  return SDValue(); // We combined away this shuffle, so we're done.
18721 
18722  // See if this reduces to a PSHUFD which is no more expensive and can
18723  // combine with more operations.
18724  if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
18726  int DMask[] = {-1, -1, -1, -1};
18727  int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
18728  DMask[DOffset + 0] = DOffset + Mask[0] / 2;
18729  DMask[DOffset + 1] = DOffset + Mask[2] / 2;
18730  V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
18731  DCI.AddToWorklist(V.getNode());
18732  V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
18733  getV4X86ShuffleImm8ForMask(DMask, DAG));
18734  DCI.AddToWorklist(V.getNode());
18735  return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
18736  }
18737 
18738  // Look for shuffle patterns which can be implemented as a single unpack.
18739  // FIXME: This doesn't handle the location of the PSHUFD generically, and
18740  // only works when we have a PSHUFD followed by two half-shuffles.
18741  if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
18742  (V.getOpcode() == X86ISD::PSHUFLW ||
18743  V.getOpcode() == X86ISD::PSHUFHW) &&
18744  V.getOpcode() != N.getOpcode() &&
18745  V.hasOneUse()) {
18746  SDValue D = V.getOperand(0);
18747  while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
18748  D = D.getOperand(0);
18749  if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
18752  int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
18753  int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
18754  int WordMask[8];
18755  for (int i = 0; i < 4; ++i) {
18756  WordMask[i + NOffset] = Mask[i] + NOffset;
18757  WordMask[i + VOffset] = VMask[i] + VOffset;
18758  }
18759  // Map the word mask through the DWord mask.
18760  int MappedMask[8];
18761  for (int i = 0; i < 8; ++i)
18762  MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
18763  const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
18764  const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
18765  if (std::equal(std::begin(MappedMask), std::end(MappedMask),
18766  std::begin(UnpackLoMask)) ||
18767  std::equal(std::begin(MappedMask), std::end(MappedMask),
18768  std::begin(UnpackHiMask))) {
18769  // We can replace all three shuffles with an unpack.
18770  V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
18771  DCI.AddToWorklist(V.getNode());
18772  return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
18773  : X86ISD::UNPCKH,
18774  DL, MVT::v8i16, V, V);
18775  }
18776  }
18777  }
18778 
18779  break;
18780 
18781  case X86ISD::PSHUFD:
18782  if (combineRedundantDWordShuffle(N, Mask, DAG, DCI))
18783  return SDValue(); // We combined away this shuffle.
18784 
18785  break;
18786  }
18787 
18788  return SDValue();
18789 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const_iterator end(StringRef path)
Get end iterator over path.
static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef< int > Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
Search for a combinable shuffle across a chain ending in pshufd.
bool hasOneUse() const
const_iterator begin(StringRef path)
Get begin iterator over path.
static bool areAdjacentMasksSequential(ArrayRef< int > Mask)
Tiny helper function to test whether adjacent masks are sequential.
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue CombineTo(SDNode *N, const std::vector< SDValue > &To, bool AddTo=true)
SDNode * getNode() const
get the SDNode which holds the desired result
assert(Globals.size() > 1)
const SDValue & getOperand(unsigned i) const
static bool isNoopShuffleMask(ArrayRef< int > Mask)
Tiny helper function to identify a no-op mask.
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef< int > Mask, SelectionDAG &DAG)
Get a 4-lane 8-bit shuffle immediate for a mask.
unsigned getOpcode() const
static SmallVector< int, 4 > getPSHUFShuffleMask(SDValue N)
Get the PSHUF-style mask from PSHUF node.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef< int > Mask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
static SDValue PerformTruncateCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

PerformTruncateCombine - Converts truncate operation to a sequence of vector shuffle operations. It is possible when we truncate 256-bit vector to 128-bit vector

Definition at line 18932 of file X86ISelLowering.cpp.

18934  {
18935  return SDValue();
18936 }
static SDValue performVectorCompareAndMaskUnaryOpCombine ( SDNode N,
SelectionDAG DAG 
)
static

Definition at line 21847 of file X86ISelLowering.cpp.

21848  {
21849  // Take advantage of vector comparisons producing 0 or -1 in each lane to
21850  // optimize away operation when it's from a constant.
21851  //
21852  // The general transformation is:
21853  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
21854  // AND(VECTOR_CMP(x,y), constant2)
21855  // constant2 = UNARYOP(constant)
21856 
21857  // Early exit if this isn't a vector operation, the operand of the
21858  // unary operation isn't a bitwise AND, or if the sizes of the operations
21859  // aren't the same.
21860  EVT VT = N->getValueType(0);
21861  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
21862  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
21863  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
21864  return SDValue();
21865 
21866  // Now check that the other operand of the AND is a constant splat. We could
21867  // make the transformation for non-constant splats as well, but it's unclear
21868  // that would be a benefit as it would not eliminate any operations, just
21869  // perform one more step in scalar code before moving to the vector unit.
21870  if (BuildVectorSDNode *BV =
21871  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
21872  // Bail out if the vector isn't a constant splat.
21873  if (!BV->getConstantSplatNode())
21874  return SDValue();
21875 
21876  // Everything checks out. Build up the new and improved node.
21877  SDLoc DL(N);
21878  EVT IntVT = BV->getValueType(0);
21879  // Create a new constant of the appropriate type for the transformed
21880  // DAG.
21881  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
21882  // The AND node needs bitcasts to/from an integer vector type around it.
21883  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
21884  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
21885  N->getOperand(0)->getOperand(0), MaskConst);
21886  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
21887  return Res;
21888  }
21889 
21890  return SDValue();
21891 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:116
EVT getValueType(unsigned ResNo) const
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
static SDValue PerformVZEXT_MOVLCombine ( SDNode N,
SelectionDAG DAG 
)
static

Definition at line 21528 of file X86ISelLowering.cpp.

21528  {
21529  SDValue Op = N->getOperand(0);
21530  if (Op.getOpcode() == ISD::BITCAST)
21531  Op = Op.getOperand(0);
21532  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
21533  if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
21535  OpVT.getVectorElementType().getSizeInBits()) {
21536  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
21537  }
21538  return SDValue();
21539 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
EVT getVectorElementType() const
Definition: ValueTypes.h:217
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
EVT getValueType() const
static SDValue performVZEXTCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

performVZEXTCombine - Performs build vector combines

Definition at line 22046 of file X86ISelLowering.cpp.

22048  {
22049  // (vzext (bitcast (vzext (x)) -> (vzext x)
22050  SDValue In = N->getOperand(0);
22051  while (In.getOpcode() == ISD::BITCAST)
22052  In = In.getOperand(0);
22053 
22054  if (In.getOpcode() != X86ISD::VZEXT)
22055  return SDValue();
22056 
22057  return DAG.getNode(X86ISD::VZEXT, SDLoc(N), N->getValueType(0),
22058  In.getOperand(0));
22059 }
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
static SDValue PerformXorCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 20850 of file X86ISelLowering.cpp.

20852  {
20853  if (DCI.isBeforeLegalizeOps())
20854  return SDValue();
20855 
20856  if (Subtarget->hasCMov()) {
20857  SDValue RV = performIntegerAbsCombine(N, DAG);
20858  if (RV.getNode())
20859  return RV;
20860  }
20861 
20862  return SDValue();
20863 }
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasCMov() const
Definition: X86Subtarget.h:309
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformZExtCombine ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 21635 of file X86ISelLowering.cpp.

21637  {
21638  // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
21639  // (and (i32 x86isd::setcc_carry), 1)
21640  // This eliminates the zext. This transformation is necessary because
21641  // ISD::SETCC is always legalized to i8.
21642  SDLoc dl(N);
21643  SDValue N0 = N->getOperand(0);
21644  EVT VT = N->getValueType(0);
21645 
21646  if (N0.getOpcode() == ISD::AND &&
21647  N0.hasOneUse() &&
21648  N0.getOperand(0).hasOneUse()) {
21649  SDValue N00 = N0.getOperand(0);
21650  if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
21652  if (!C || C->getZExtValue() != 1)
21653  return SDValue();
21654  return DAG.getNode(ISD::AND, dl, VT,
21655  DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
21656  N00.getOperand(0), N00.getOperand(1)),
21657  DAG.getConstant(1, VT));
21658  }
21659  }
21660 
21661  if (N0.getOpcode() == ISD::TRUNCATE &&
21662  N0.hasOneUse() &&
21663  N0.getOperand(0).hasOneUse()) {
21664  SDValue N00 = N0.getOperand(0);
21665  if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
21666  return DAG.getNode(ISD::AND, dl, VT,
21667  DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
21668  N00.getOperand(0), N00.getOperand(1)),
21669  DAG.getConstant(1, VT));
21670  }
21671  }
21672  if (VT.is256BitVector()) {
21673  SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
21674  if (R.getNode())
21675  return R;
21676  }
21677 
21678  return SDValue();
21679 }
bool hasOneUse() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
SDNode * getNode() const
get the SDNode which holds the desired result
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget)
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:265
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
uint64_t getZExtValue() const
static SDValue PromoteSplat ( ShuffleVectorSDNode SV,
SelectionDAG DAG 
)
static

PromoteSplat - Splat is promoted to target supported vector shuffles.

Definition at line 5048 of file X86ISelLowering.cpp.

5048  {
5049  MVT SrcVT = SV->getSimpleValueType(0);
5050  SDValue V1 = SV->getOperand(0);
5051  SDLoc dl(SV);
5052 
5053  int EltNo = SV->getSplatIndex();
5054  int NumElems = SrcVT.getVectorNumElements();
5055  bool Is256BitVec = SrcVT.is256BitVector();
5056 
5057  assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
5058  "Unknown how to promote splat for type");
5059 
5060  // Extract the 128-bit part containing the splat element and update
5061  // the splat element index when it refers to the higher register.
5062  if (Is256BitVec) {
5063  V1 = Extract128BitVector(V1, EltNo, DAG, dl);
5064  if (EltNo >= NumElems/2)
5065  EltNo -= NumElems/2;
5066  }
5067 
5068  // All i16 and i8 vector types can't be used directly by a generic shuffle
5069  // instruction because the target has no such instruction. Generate shuffles
5070  // which repeat i16 and i8 several times until they fit in i32, and then can
5071  // be manipulated by target suported shuffles.
5072  MVT EltVT = SrcVT.getVectorElementType();
5073  if (EltVT == MVT::i8 || EltVT == MVT::i16)
5074  V1 = PromoteSplati8i16(V1, DAG, EltNo);
5075 
5076  // Recreate the 256-bit vector and place the same 128-bit vector
5077  // into the low and high part. This is necessary because we want
5078  // to use VPERM* to shuffle the vectors
5079  if (Is256BitVec) {
5080  V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
5081  }
5082 
5083  return getLegalSplat(DAG, V1, EltNo);
5084 }
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo)
const SDValue & getOperand(unsigned Num) const
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo)
getLegalSplat - Generate a legal splat with supported x86 shuffles
assert(Globals.size() > 1)
unsigned getVectorNumElements() const
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static SDValue PromoteSplati8i16 ( SDValue  V,
SelectionDAG DAG,
int &  EltNo 
)
static

Definition at line 5004 of file X86ISelLowering.cpp.

5004  {
5005  MVT VT = V.getSimpleValueType();
5006  int NumElems = VT.getVectorNumElements();
5007  SDLoc dl(V);
5008 
5009  while (NumElems > 4) {
5010  if (EltNo < NumElems/2) {
5011  V = getUnpackl(DAG, dl, VT, V, V);
5012  } else {
5013  V = getUnpackh(DAG, dl, VT, V, V);
5014  EltNo -= NumElems/2;
5015  }
5016  NumElems >>= 1;
5017  }
5018  return V;
5019 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
getUnpackh - Returns a vector_shuffle node for an unpackh operation.
unsigned getVectorNumElements() const
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2)
getUnpackl - Returns a vector_shuffle node for an unpackl operation.
static void ReplaceATOMIC_LOAD ( SDNode Node,
SmallVectorImpl< SDValue > &  Results,
SelectionDAG DAG 
)
static

Definition at line 16295 of file X86ISelLowering.cpp.

16297  {
16298  SDLoc dl(Node);
16299  EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
16300 
16301  // Convert wide load -> cmpxchg8b/cmpxchg16b
16302  // FIXME: On 32-bit, load -> fild or movq would be more efficient
16303  // (The only way to get a 16-byte load is cmpxchg16b)
16304  // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
16305  SDValue Zero = DAG.getConstant(0, VT);
16306  SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
16307  SDValue Swap =
16309  Node->getOperand(0), Node->getOperand(1), Zero, Zero,
16310  cast<AtomicSDNode>(Node)->getMemOperand(),
16311  cast<AtomicSDNode>(Node)->getOrdering(),
16312  cast<AtomicSDNode>(Node)->getOrdering(),
16313  cast<AtomicSDNode>(Node)->getSynchScope());
16314  Results.push_back(Swap.getValue(0));
16315  Results.push_back(Swap.getValue(2));
16316 }
SDValue getValue(unsigned R) const
const SDValue & getOperand(unsigned Num) const
SDVTList getVTList(EVT VT)
const DomTreeNodeT * Node
***NAME is the name of the raw_ostream unsigned & i1
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope)
static SDValue RewriteAsNarrowerShuffle ( ShuffleVectorSDNode SVOp,
SelectionDAG DAG 
)
static

RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be done when every pair / quad of shuffle mask elements point to elements in the right sequence. e.g. vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>

Definition at line 8562 of file X86ISelLowering.cpp.

8563  {
8564  MVT VT = SVOp->getSimpleValueType(0);
8565  SDLoc dl(SVOp);
8566  unsigned NumElems = VT.getVectorNumElements();
8567  MVT NewVT;
8568  unsigned Scale;
8569  switch (VT.SimpleTy) {
8570  default: llvm_unreachable("Unexpected!");
8571  case MVT::v2i64:
8572  case MVT::v2f64:
8573  return SDValue(SVOp, 0);
8574  case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
8575  case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
8576  case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
8577  case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
8578  case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
8579  case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
8580  }
8581 
8582  SmallVector<int, 8> MaskVec;
8583  for (unsigned i = 0; i != NumElems; i += Scale) {
8584  int StartIdx = -1;
8585  for (unsigned j = 0; j != Scale; ++j) {
8586  int EltIdx = SVOp->getMaskElt(i+j);
8587  if (EltIdx < 0)
8588  continue;
8589  if (StartIdx < 0)
8590  StartIdx = (EltIdx / Scale);
8591  if (EltIdx != (int)(StartIdx*Scale + j))
8592  return SDValue();
8593  }
8594  MaskVec.push_back(StartIdx);
8595  }
8596 
8597  SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
8598  SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
8599  return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
8600 }
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SimpleValueType SimpleTy
int getMaskElt(unsigned Idx) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
unsigned getVectorNumElements() const
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
MVT getSimpleValueType(unsigned ResNo) const
static bool shouldLowerAsInterleaving ( ArrayRef< int >  Mask)
static

Detect whether the mask pattern should be lowered through interleaving.

This essentially tests whether viewing the mask as an interleaving of two sub-sequences reduces the cross-input traffic of a blend operation. If so, lowering it through interleaving is a significantly better strategy.

Definition at line 7434 of file X86ISelLowering.cpp.

7434  {
7435  int NumEvenInputs[2] = {0, 0};
7436  int NumOddInputs[2] = {0, 0};
7437  int NumLoInputs[2] = {0, 0};
7438  int NumHiInputs[2] = {0, 0};
7439  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
7440  if (Mask[i] < 0)
7441  continue;
7442 
7443  int InputIdx = Mask[i] >= Size;
7444 
7445  if (i < Size / 2)
7446  ++NumLoInputs[InputIdx];
7447  else
7448  ++NumHiInputs[InputIdx];
7449 
7450  if ((i % 2) == 0)
7451  ++NumEvenInputs[InputIdx];
7452  else
7453  ++NumOddInputs[InputIdx];
7454  }
7455 
7456  // The minimum number of cross-input results for both the interleaved and
7457  // split cases. If interleaving results in fewer cross-input results, return
7458  // true.
7459  int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0],
7460  NumEvenInputs[0] + NumOddInputs[1]);
7461  int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0],
7462  NumLoInputs[0] + NumHiInputs[1]);
7463  return InterleavedCrosses < SplitCrosses;
7464 }
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:109
static bool ShouldXformToMOVHLPS ( ArrayRef< int >  Mask,
MVT  VT 
)
static

ShouldXformToMOVHLPS - Return true if the node should be transformed to match movhlps. The lower half elements should come from upper half of V1 (and in order), and the upper half elements should come from the upper half of V2 (and in order).

Definition at line 4774 of file X86ISelLowering.cpp.

4774  {
4775  if (!VT.is128BitVector())
4776  return false;
4777  if (VT.getVectorNumElements() != 4)
4778  return false;
4779  for (unsigned i = 0, e = 2; i != e; ++i)
4780  if (!isUndefOrEqual(Mask[i], i+2))
4781  return false;
4782  for (unsigned i = 2; i != 4; ++i)
4783  if (!isUndefOrEqual(Mask[i], i+4))
4784  return false;
4785  return true;
4786 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool ShouldXformToMOVLP ( SDNode V1,
SDNode V2,
ArrayRef< int >  Mask,
MVT  VT 
)
static

ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to match movlp{s|d}. The lower half elements should come from lower half of V1 (and in order), and the upper half elements should come from the upper half of V2 (and in order). And since V1 will become the source of the MOVLP, it must be either a vector load or a scalar load to vector.

Definition at line 4830 of file X86ISelLowering.cpp.

4831  {
4832  if (!VT.is128BitVector())
4833  return false;
4834 
4835  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
4836  return false;
4837  // Is V2 is a vector load, don't do this transformation. We will try to use
4838  // load folding shufps op.
4840  return false;
4841 
4842  unsigned NumElems = VT.getVectorNumElements();
4843 
4844  if (NumElems != 2 && NumElems != 4)
4845  return false;
4846  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
4847  if (!isUndefOrEqual(Mask[i], i))
4848  return false;
4849  for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
4850  if (!isUndefOrEqual(Mask[i], i+NumElems))
4851  return false;
4852  return true;
4853 }
static bool isUndefOrEqual(int Val, int CmpVal)
unsigned getVectorNumElements() const
static bool WillBeConstantPoolLoad(SDNode *N)
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD=nullptr)
bool isNON_EXTLoad(const SDNode *N)
static bool ShuffleCrosses128bitLane ( MVT  VT,
unsigned  InputIdx,
unsigned  OutputIdx 
)
static

In vector type VT, return true if the element at index InputIdx falls on a different 128-bit lane than OutputIdx.

Definition at line 8086 of file X86ISelLowering.cpp.

8087  {
8088  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
8089  return InputIdx * EltSize / 128 != OutputIdx * EltSize / 128;
8090 }
unsigned getSizeInBits() const
MVT getVectorElementType() const
STATISTIC ( NumTailCalls  ,
"Number of tail calls"   
)
static SDValue TransformVSELECTtoBlendVECTOR_SHUFFLE ( SDNode N,
SelectionDAG DAG,
const X86Subtarget Subtarget 
)
static

Definition at line 19206 of file X86ISelLowering.cpp.

19207  {
19208  SDLoc dl(N);
19209  SDValue Cond = N->getOperand(0);
19210  SDValue LHS = N->getOperand(1);
19211  SDValue RHS = N->getOperand(2);
19212 
19213  if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
19214  SDValue CondSrc = Cond->getOperand(0);
19215  if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
19216  Cond = CondSrc->getOperand(0);
19217  }
19218 
19219  MVT VT = N->getSimpleValueType(0);
19220  MVT EltVT = VT.getVectorElementType();
19221  unsigned NumElems = VT.getVectorNumElements();
19222  // There is no blend with immediate in AVX-512.
19223  if (VT.is512BitVector())
19224  return SDValue();
19225 
19226  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
19227  return SDValue();
19228  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
19229  return SDValue();
19230 
19232  return SDValue();
19233 
19234  unsigned MaskValue = 0;
19235  if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
19236  return SDValue();
19237 
19238  SmallVector<int, 8> ShuffleMask(NumElems, -1);
19239  for (unsigned i = 0; i < NumElems; ++i) {
19240  // Be sure we emit undef where we can.
19241  if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
19242  ShuffleMask[i] = -1;
19243  else
19244  ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
19245  }
19246 
19247  return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
19248 }
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
bool is512BitVector() const
is512BitVector - Return true if this is a 512-bit vector type.
unsigned getOpcode() const
bool hasSSE41() const
Definition: X86Subtarget.h:315
const SDValue & getOperand(unsigned Num) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
unsigned getVectorNumElements() const
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
bool hasInt256() const
Definition: X86Subtarget.h:321
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, unsigned &MaskValue)
MVT getVectorElementType() const
MVT getSimpleValueType(unsigned ResNo) const
static unsigned TranslateX86CC ( ISD::CondCode  SetCCOpcode,
bool  isFP,
SDValue LHS,
SDValue RHS,
SelectionDAG DAG 
)
static

TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 specific condition code, returning the condition code and the LHS/RHS of the comparison to make.

Definition at line 3555 of file X86ISelLowering.cpp.

3556  {
3557  if (!isFP) {
3558  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
3559  if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
3560  // X > -1 -> X == 0, jump !sign.
3561  RHS = DAG.getConstant(0, RHS.getValueType());
3562  return X86::COND_NS;
3563  }
3564  if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
3565  // X < 0 -> X == 0, jump on sign.
3566  return X86::COND_S;
3567  }
3568  if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
3569  // X < 1 -> X <= 0
3570  RHS = DAG.getConstant(0, RHS.getValueType());
3571  return X86::COND_LE;
3572  }
3573  }
3574 
3575  switch (SetCCOpcode) {
3576  default: llvm_unreachable("Invalid integer condition!");
3577  case ISD::SETEQ: return X86::COND_E;
3578  case ISD::SETGT: return X86::COND_G;
3579  case ISD::SETGE: return X86::COND_GE;
3580  case ISD::SETLT: return X86::COND_L;
3581  case ISD::SETLE: return X86::COND_LE;
3582  case ISD::SETNE: return X86::COND_NE;
3583  case ISD::SETULT: return X86::COND_B;
3584  case ISD::SETUGT: return X86::COND_A;
3585  case ISD::SETULE: return X86::COND_BE;
3586  case ISD::SETUGE: return X86::COND_AE;
3587  }
3588  }
3589 
3590  // First determine if it is required or is profitable to flip the operands.
3591 
3592  // If LHS is a foldable load, but RHS is not, flip the condition.
3593  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
3594  !ISD::isNON_EXTLoad(RHS.getNode())) {
3595  SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
3596  std::swap(LHS, RHS);
3597  }
3598 
3599  switch (SetCCOpcode) {
3600  default: break;
3601  case ISD::SETOLT:
3602  case ISD::SETOLE:
3603  case ISD::SETUGT:
3604  case ISD::SETUGE:
3605  std::swap(LHS, RHS);
3606  break;
3607  }
3608 
3609  // On a floating point condition, the flags are set as follows:
3610  // ZF PF CF op
3611  // 0 | 0 | 0 | X > Y
3612  // 0 | 0 | 1 | X < Y
3613  // 1 | 0 | 0 | X == Y
3614  // 1 | 1 | 1 | unordered
3615  switch (SetCCOpcode) {
3616  default: llvm_unreachable("Condcode should be pre-legalized away");
3617  case ISD::SETUEQ:
3618  case ISD::SETEQ: return X86::COND_E;
3619  case ISD::SETOLT: // flipped
3620  case ISD::SETOGT:
3621  case ISD::SETGT: return X86::COND_A;
3622  case ISD::SETOLE: // flipped
3623  case ISD::SETOGE:
3624  case ISD::SETGE: return X86::COND_AE;
3625  case ISD::SETUGT: // flipped
3626  case ISD::SETULT:
3627  case ISD::SETLT: return X86::COND_B;
3628  case ISD::SETUGE: // flipped
3629  case ISD::SETULE:
3630  case ISD::SETLE: return X86::COND_BE;
3631  case ISD::SETONE:
3632  case ISD::SETNE: return X86::COND_NE;
3633  case ISD::SETUO: return X86::COND_P;
3634  case ISD::SETO: return X86::COND_NP;
3635  case ISD::SETOEQ:
3636  case ISD::SETUNE: return X86::COND_INVALID;
3637  }
3638 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
SDNode * getNode() const
get the SDNode which holds the desired result
CondCode getSetCCSwappedOperands(CondCode Operation)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
bool isNON_EXTLoad(const SDNode *N)
EVT getValueType() const
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
static int translateX86FSETCC ( ISD::CondCode  SetCCOpcode,
SDValue Op0,
SDValue Op1 
)
static

  • Turns an ISD::CondCode into a value suitable for SSE floating point mask CMPs.

Definition at line 12027 of file X86ISelLowering.cpp.

12028  {
12029  unsigned SSECC;
12030  bool Swap = false;
12031 
12032  // SSE Condition code mapping:
12033  // 0 - EQ
12034  // 1 - LT
12035  // 2 - LE
12036  // 3 - UNORD
12037  // 4 - NEQ
12038  // 5 - NLT
12039  // 6 - NLE
12040  // 7 - ORD
12041  switch (SetCCOpcode) {
12042  default: llvm_unreachable("Unexpected SETCC condition");
12043  case ISD::SETOEQ:
12044  case ISD::SETEQ: SSECC = 0; break;
12045  case ISD::SETOGT:
12046  case ISD::SETGT: Swap = true; // Fallthrough
12047  case ISD::SETLT:
12048  case ISD::SETOLT: SSECC = 1; break;
12049  case ISD::SETOGE:
12050  case ISD::SETGE: Swap = true; // Fallthrough
12051  case ISD::SETLE:
12052  case ISD::SETOLE: SSECC = 2; break;
12053  case ISD::SETUO: SSECC = 3; break;
12054  case ISD::SETUNE:
12055  case ISD::SETNE: SSECC = 4; break;
12056  case ISD::SETULE: Swap = true; // Fallthrough
12057  case ISD::SETUGE: SSECC = 5; break;
12058  case ISD::SETULT: Swap = true; // Fallthrough
12059  case ISD::SETUGT: SSECC = 6; break;
12060  case ISD::SETO: SSECC = 7; break;
12061  case ISD::SETUEQ:
12062  case ISD::SETONE: SSECC = 8; break;
12063  }
12064  if (Swap)
12065  std::swap(Op0, Op1);
12066 
12067  return SSECC;
12068 }
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:590
static SDValue WidenMaskArithmetic ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI,
const X86Subtarget Subtarget 
)
static

Definition at line 20511 of file X86ISelLowering.cpp.

20513  {
20514  EVT VT = N->getValueType(0);
20515  if (!VT.is256BitVector())
20516  return SDValue();
20517 
20518  assert((N->getOpcode() == ISD::ANY_EXTEND ||
20519  N->getOpcode() == ISD::ZERO_EXTEND ||
20520  N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
20521 
20522  SDValue Narrow = N->getOperand(0);
20523  EVT NarrowVT = Narrow->getValueType(0);
20524  if (!NarrowVT.is128BitVector())
20525  return SDValue();
20526 
20527  if (Narrow->getOpcode() != ISD::XOR &&
20528  Narrow->getOpcode() != ISD::AND &&
20529  Narrow->getOpcode() != ISD::OR)
20530  return SDValue();
20531 
20532  SDValue N0 = Narrow->getOperand(0);
20533  SDValue N1 = Narrow->getOperand(1);
20534  SDLoc DL(Narrow);
20535 
20536  // The Left side has to be a trunc.
20537  if (N0.getOpcode() != ISD::TRUNCATE)
20538  return SDValue();
20539 
20540  // The type of the truncated inputs.
20541  EVT WideVT = N0->getOperand(0)->getValueType(0);
20542  if (WideVT != VT)
20543  return SDValue();
20544 
20545  // The right side has to be a 'trunc' or a constant vector.
20546  bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
20547  ConstantSDNode *RHSConstSplat = nullptr;
20548  if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
20549  RHSConstSplat = RHSBV->getConstantSplatNode();
20550  if (!RHSTrunc && !RHSConstSplat)
20551  return SDValue();
20552 
20553  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20554 
20555  if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
20556  return SDValue();
20557 
20558  // Set N0 and N1 to hold the inputs to the new wide operation.
20559  N0 = N0->getOperand(0);
20560  if (RHSConstSplat) {
20561  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
20562  SDValue(RHSConstSplat, 0));
20564  N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
20565  } else if (RHSTrunc) {
20566  N1 = N1->getOperand(0);
20567  }
20568 
20569  // Generate the wide operation.
20570  SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
20571  unsigned Opcode = N->getOpcode();
20572  switch (Opcode) {
20573  case ISD::ANY_EXTEND:
20574  return Op;
20575  case ISD::ZERO_EXTEND: {
20576  unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
20577  APInt Mask = APInt::getAllOnesValue(InBits);
20578  Mask = Mask.zext(VT.getScalarType().getSizeInBits());
20579  return DAG.getNode(ISD::AND, DL, VT,
20580  Op, DAG.getConstant(Mask, VT));
20581  }
20582  case ISD::SIGN_EXTEND:
20583  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
20584  Op, DAG.getValueType(NarrowVT));
20585  default:
20586  llvm_unreachable("Unexpected opcode");
20587  }
20588 }
unsigned getOpcode() const
const SDValue & getOperand(unsigned Num) const
#define llvm_unreachable(msg)
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
EVT getScalarType() const
Definition: ValueTypes.h:211
INITIALIZE_TM_PASS(GlobalMerge,"global-merge","Merge global variables", false, false) bool GlobalMerge const DataLayout * DL
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
assert(Globals.size() > 1)
bool is256BitVector() const
is256BitVector - Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:141
unsigned getOpcode() const
Class for arbitrary precision integers.
Definition: APInt.h:75
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:362
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:365
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:234
bool is128BitVector() const
is128BitVector - Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:136
SDValue getConstant(uint64_t Val, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getValueType(EVT)
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:984
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:368
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226
static bool WillBeConstantPoolLoad ( SDNode N)
static

Definition at line 4804 of file X86ISelLowering.cpp.

4804  {
4805  if (N->getOpcode() != ISD::BUILD_VECTOR)
4806  return false;
4807 
4808  // Check for any non-constant elements.
4809  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
4810  switch (N->getOperand(i).getNode()->getOpcode()) {
4811  case ISD::UNDEF:
4812  case ISD::ConstantFP:
4813  case ISD::Constant:
4814  break;
4815  default:
4816  return false;
4817  }
4818 
4819  // Vectors of all-zeros and all-ones are materialized with special
4820  // instructions rather than being loaded.
4821  return !ISD::isBuildVectorAllZeros(N) &&
4823 }
unsigned getOpcode() const
unsigned getNumOperands() const
const SDValue & getOperand(unsigned Num) const
bool isBuildVectorAllZeros(const SDNode *N)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:159
SDNode * getNode() const
get the SDNode which holds the desired result
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
static SDValue XFormVExtractWithShuffleIntoLoad ( SDNode N,
SelectionDAG DAG,
TargetLowering::DAGCombinerInfo DCI 
)
static

XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target specific shuffle of a load can be folded into a single element load. Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but shuffles have been customed lowered so we need to handle those here.

Definition at line 18942 of file X86ISelLowering.cpp.

18943  {
18944  if (DCI.isBeforeLegalizeOps())
18945  return SDValue();
18946 
18947  SDValue InVec = N->getOperand(0);
18948  SDValue EltNo = N->getOperand(1);
18949 
18950  if (!isa<ConstantSDNode>(EltNo))
18951  return SDValue();
18952 
18953  EVT VT = InVec.getValueType();
18954 
18955  bool HasShuffleIntoBitcast = false;
18956  if (InVec.getOpcode() == ISD::BITCAST) {
18957  // Don't duplicate a load with other uses.
18958  if (!InVec.hasOneUse())
18959  return SDValue();
18960  EVT BCVT = InVec.getOperand(0).getValueType();
18961  if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
18962  return SDValue();
18963  InVec = InVec.getOperand(0);
18964  HasShuffleIntoBitcast = true;
18965  }
18966 
18967  if (!isTargetShuffle(InVec.getOpcode()))
18968  return SDValue();
18969 
18970  // Don't duplicate a load with other uses.
18971  if (!InVec.hasOneUse())
18972  return SDValue();
18973 
18974  SmallVector<int, 16> ShuffleMask;
18975  bool UnaryShuffle;
18976  if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask,
18977  UnaryShuffle))
18978  return SDValue();
18979 
18980  // Select the input vector, guarding against out of range extract vector.
18981  unsigned NumElems = VT.getVectorNumElements();
18982  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
18983  int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
18984  SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
18985  : InVec.getOperand(1);
18986 
18987  // If inputs to shuffle are the same for both ops, then allow 2 uses
18988  unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
18989 
18990  if (LdNode.getOpcode() == ISD::BITCAST) {
18991  // Don't duplicate a load with other uses.
18992  if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
18993  return SDValue();
18994 
18995  AllowedUses = 1; // only allow 1 load use if we have a bitcast
18996  LdNode = LdNode.getOperand(0);
18997  }
18998 
18999  if (!ISD::isNormalLoad(LdNode.getNode()))
19000  return SDValue();
19001 
19002  LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
19003 
19004  if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
19005  return SDValue();
19006 
19007  if (HasShuffleIntoBitcast) {
19008  // If there's a bitcast before the shuffle, check if the load type and
19009  // alignment is valid.
19010  unsigned Align = LN0->getAlignment();
19011  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19012  unsigned NewAlign = TLI.getDataLayout()->
19013  getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
19014 
19015  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
19016  return SDValue();
19017  }
19018 
19019  // All checks match so transform back to vector_shuffle so that DAG combiner
19020  // can finish the job
19021  SDLoc dl(N);
19022 
19023  // Create shuffle node taking into account the case that its a unary shuffle
19024  SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
19025  Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
19026  InVec.getOperand(0), Shuffle,
19027  &ShuffleMask[0]);
19028  Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
19029  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
19030  EltNo);
19031 }
LLVMContext * getContext() const
Definition: SelectionDAG.h:280
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
bool hasOneUse() const
Type * getTypeForEVT(LLVMContext &Context) const
Definition: ValueTypes.cpp:180
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
SDValue getUNDEF(EVT VT)
getUNDEF - Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:625
static bool isTargetShuffle(unsigned Opcode)
SDNode * getNode() const
get the SDNode which holds the desired result
bool isNormalLoad(const SDNode *N)
const SDValue & getOperand(unsigned i) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
const DataLayout * getDataLayout() const
unsigned getOpcode() const
bool isVolatile() const
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT)
void Shuffle(internal::Random *random, std::vector< E > *v)
EVT getValueType() const
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:278
static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl< int > &Mask, bool &IsUnary)
unsigned getAlignment() const
MVT getSimpleVT() const
Definition: ValueTypes.h:204
unsigned getVectorNumElements() const
Definition: ValueTypes.h:226

Variable Documentation

cl::opt<bool> ExperimentalVectorShuffleLowering("x86-experimental-vector-shuffle-lowering", cl::init(false), cl::desc("Enable an experimental vector shuffle lowering code path."), cl::Hidden)
static
cl::opt<bool> ExperimentalVectorWideningLegalization("x86-experimental-vector-widening-legalization", cl::init(false), cl::desc("Enable an experimental vector type legalization through widening ""rather than promotion."), cl::Hidden)
static
std::map< unsigned, IntrinsicData> IntrMap

Definition at line 14397 of file X86ISelLowering.cpp.